]> Kevux Git Server - fll/commitdiff
Bugfix: Combining and Width detection for utf8 are not properly printing.
authorKevin Day <thekevinday@gmail.com>
Mon, 23 May 2022 02:39:27 +0000 (21:39 -0500)
committerKevin Day <thekevinday@gmail.com>
Mon, 23 May 2022 02:41:35 +0000 (21:41 -0500)
The wrong data is being passed to utf8_print_combining_or_width().
Change the behavior to send the correct string to the function.
Move the error printing to a single function and use this function in all such cases.

level_3/utf8/c/common.h
level_3/utf8/c/private-print.c
level_3/utf8/c/private-print.h
level_3/utf8/c/private-utf8_codepoint.c

index 2852a159c3e8e7d24c4eeb4f67925a2321b7188c..8d231fdf0cf25dfa53d91bf72df1ebe01cf73a37 100644 (file)
@@ -297,22 +297,22 @@ extern "C" {
  * Modes used to designate how to the input and output are to be processed.
  *
  * utf8_mode_from_*:
- *   - bytesequence:  The input format is bytesequence.
- *   - codepoint: The input format is codepoint (U+XXXX or U+XXXXXX).
+ *   - bytesequence: The input format is bytesequence.
+ *   - codepoint:    The input format is codepoint (U+XXXX or U+XXXXXX).
  *
  * utf8_mode_to_*:
- *   - bytesequence:  The outout format is bytesequence.
- *   - codepoint: The outout format is codepoint (U+XXXX or U+XXXXXX).
- *   - combining: The outout format is whether or not character is combining (may be used with "width").
- *   - width:     The outout format is how wide the character is (may be used with "combining").
+ *   - bytesequence: The outout format is bytesequence.
+ *   - codepoint:    The outout format is codepoint (U+XXXX or U+XXXXXX).
+ *   - combining:    The outout format is whether or not character is combining (may be used with "width").
+ *   - width:        The outout format is how wide the character is (may be used with "combining").
  */
 #ifndef _di_utf8_modes_
-  #define utf8_mode_from_bytesequence_d  0x1
-  #define utf8_mode_from_codepoint_d 0x2
-  #define utf8_mode_to_bytesequence_d    0x4
-  #define utf8_mode_to_codepoint_d   0x8
-  #define utf8_mode_to_combining_d   0x10
-  #define utf8_mode_to_width_d       0x20
+  #define utf8_mode_from_bytesequence_d 0x1
+  #define utf8_mode_from_codepoint_d    0x2
+  #define utf8_mode_to_bytesequence_d   0x4
+  #define utf8_mode_to_codepoint_d      0x8
+  #define utf8_mode_to_combining_d      0x10
+  #define utf8_mode_to_width_d          0x20
 #endif // _di_utf8_modes_
 
 #ifdef __cplusplus
index 4f7a9aa84e0412df66acc22c64ddf0c84c74e942..ea02bfec4e302fb444445f82d30a0d5cdf213ea9 100644 (file)
@@ -82,7 +82,7 @@ extern "C" {
         }
       }
       else if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_none_e && data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
-        fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+        utf8_print_error_combining_or_width(data);
       }
     }
     else if (data->mode & utf8_mode_to_width_d) {
@@ -91,6 +91,16 @@ extern "C" {
   }
 #endif // _di_utf8_print_combining_or_width_
 
+#ifndef _di_utf8_print_error_combining_or_width_
+  void utf8_print_error_combining_or_width(utf8_data_t * const data) {
+
+    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
+    if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
+
+    fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+  }
+#endif // _di_utf8_print_error_combining_or_width_
+
 #ifndef _di_utf8_print_error_decode_
   void utf8_print_error_decode(utf8_data_t * const data, const f_status_t status, const f_string_static_t character) {
 
@@ -285,7 +295,7 @@ extern "C" {
     f_status_t status = F_none;
 
     if (data->mode & utf8_mode_to_combining_d) {
-      fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+      utf8_print_error_combining_or_width(data);
     }
     else if (data->mode & utf8_mode_to_width_d) {
       const f_string_static_t *character = 0;
@@ -407,9 +417,38 @@ extern "C" {
       }
     }
 
-    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_none_e && data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
-      fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+    utf8_print_error_combining_or_width(data);
+  }
+#endif // _di_utf8_print_width_
+
+#ifndef _di_utf8_print_width_codepoint_
+  void utf8_print_width_codepoint(utf8_data_t * const data, const f_string_static_t character) {
+
+    f_status_t status = f_utf_is_wide(character.string, character.used);
+
+    if (status == F_true) {
+      fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_2_s, data->append);
+
+      return;
     }
+
+    if (status == F_false) {
+      status = f_utf_is_graph(character.string, character.used);
+
+      if (status == F_true) {
+        fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_1_s, data->append);
+
+        return;
+      }
+
+      if (status == F_false) {
+        fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_0_s, data->append);
+
+        return;
+      }
+    }
+
+    utf8_print_error_combining_or_width(data);
   }
 #endif // _di_utf8_print_width_
 
index 8b3b3c8270e337ae4db0f4164c24a9411d2bda09..8afd76b0e812bc479067e448b9507528b88c4c60 100644 (file)
@@ -69,6 +69,16 @@ extern "C" {
 #endif // _di_utf8_print_combining_or_width_
 
 /**
+ * Print an error regarding the width or combining state of a some character.
+ *
+ * @param data
+ *   The program data.
+ */
+#ifndef _di_utf8_print_error_combining_or_width_
+  extern void utf8_print_error_combining_or_width(utf8_data_t * const data) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_error_combining_or_width_
+
+/**
  * Print error message when attempt to decode the character failed.
  *
  * @param data
index 20b89a4c9ddb254a2a47c52ae4b2f10e8c787cc1..30cbe37a495e23b22b4e252ba40dad697a641eba 100644 (file)
@@ -48,27 +48,34 @@ extern "C" {
         }
       }
       else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
-        if (data->mode & utf8_mode_to_bytesequence_d) {
+        if (data->mode & utf8_mode_to_codepoint_d) {
+          utf8_print_codepoint(data, codepoint);
+        }
+        else {
           f_char_t byte[4] = { 0, 0, 0, 0 };
           f_string_static_t character = macro_f_string_static_t_initialize(byte, 0, 4);
 
           status = f_utf_unicode_from(codepoint, 4, &character.string);
 
           if (F_status_is_error(status)) {
-            utf8_print_error_encode(data, status, codepoint);
+            if (data->mode & utf8_mode_to_bytesequence_d) {
+              utf8_print_error_encode(data, status, codepoint);
+            }
+            else {
+              utf8_print_error_combining_or_width(data);
+            }
           }
-          else {
+          else if (data->mode & utf8_mode_to_bytesequence_d) {
             status = F_none;
             character.used = macro_f_utf_byte_width(character.string[0]);
 
             utf8_print_bytesequence(data, character);
           }
-        }
-        else if (data->mode & utf8_mode_to_codepoint_d) {
-          utf8_print_codepoint(data, codepoint);
-        }
-        else {
-          utf8_print_combining_or_width(data, character);
+          else {
+            status = F_none;
+
+            utf8_print_combining_or_width(data, character);
+          }
         }
       }
     }