]> Kevux Git Server - fll/commitdiff
Bugfix: Codepoint to Binary is not working.
authorKevin Day <thekevinday@gmail.com>
Fri, 10 Dec 2021 01:17:43 +0000 (19:17 -0600)
committerKevin Day <thekevinday@gmail.com>
Fri, 10 Dec 2021 01:17:43 +0000 (19:17 -0600)
The wrong variable is being processed.
The codepoint (which is the Unicode representation, such as U+8C78 the codepoint is for the character '豸') is being width checked.
The binary character is what should be getting the width check.

A second situation where the codepoint is not being printed at all is with the files.
It seems that I forgot to finish writing this code (another problem caused by my original accidental commit of this project).

While investigating this I saw some opportunity for some cleanup.
- Move the width detection into a separate function utf8_process_text_width().
- Use character.string[0] instead of *character.string.
- Rename 'character' to 'current' to make more semantic sense (At the time I wasn't sure what to call it and 'text' was already unavailable).
- The 'text' in private-utf8_codepoint.c is now initialized in a simpler way.

level_3/utf8/c/private-utf8.c
level_3/utf8/c/private-utf8.h
level_3/utf8/c/private-utf8_binary.c
level_3/utf8/c/private-utf8_codepoint.c

index 70d99d668f7b356c2b2c17618dfdea184a1739c6..ccee4d40d1f7ca3c48bffe80d9d4f6e9149fbdb0 100644 (file)
@@ -32,11 +32,13 @@ extern "C" {
     bool valid = F_true;
     uint8_t mode_codepoint = utf8_codepoint_mode_ready;
 
-    f_string_static_t character = macro_f_string_static_t_initialize(text, 4);
+    f_string_static_t current = macro_f_string_static_t_initialize(text, 0);
+
+    utf8_process_text_width(&current);
 
     flockfile(data->file.stream);
 
-    for (uint16_t signal_check = 0; *character.string && F_status_is_error_not(status); ) {
+    for (uint16_t signal_check = 0; current.string[0] && F_status_is_error_not(status); ) {
 
       if (!((++signal_check) % utf8_signal_check_d)) {
         if (utf8_signal_received(data)) {
@@ -48,53 +50,24 @@ extern "C" {
       }
 
       status = F_none;
-      character.used = macro_f_utf_byte_width(*character.string);
-
-      // Re-adjust used if buffer ended before the character is supposed to end.
-      if (character.string[0]) {
-        if (character.used > 1) {
-          if (character.string[1]) {
-            if (character.used > 2) {
-              if (character.string[2]) {
-                if (character.used > 3) {
-                  if (character.string[3]) {
-                    character.used = 4;
-                  }
-                  else {
-                    character.used = 3;
-                  }
-                }
-              }
-              else {
-                character.used = 2;
-              }
-            }
-          }
-          else {
-            character.used = 1;
-          }
-        }
-      }
-      else {
-        character.used = 0;
-      }
 
       if (data->mode & utf8_mode_from_binary_d) {
-        status = utf8_convert_binary(data, character);
+        status = utf8_convert_binary(data, current);
       }
       else {
-        status = utf8_detect_codepoint(data, character, &mode_codepoint);
+        status = utf8_detect_codepoint(data, current, &mode_codepoint);
 
         if (F_status_is_fine(status) && status != F_next) {
-          status = utf8_convert_codepoint(data, character, &mode_codepoint);
+          status = utf8_convert_codepoint(data, current, &mode_codepoint);
         }
       }
 
-      character.string += character.used;
-
       if (status == F_utf) {
         valid = F_false;
       }
+
+      current.string += current.used;
+      utf8_process_text_width(&current);
     } // for
 
     if (F_status_is_error_not(status) && !(data->mode & utf8_mode_from_binary_d)) {
@@ -107,9 +80,9 @@ extern "C" {
           valid = F_false;
         }
 
-        character.used = 0;
+        current.used = 0;
 
-        status = utf8_convert_codepoint(data, character, &mode_codepoint);
+        status = utf8_convert_codepoint(data, current, &mode_codepoint);
       }
     }
 
@@ -133,6 +106,55 @@ extern "C" {
   }
 #endif // _di_utf8_process_text_
 
+#ifndef _di_utf8_process_text_width_
+  void utf8_process_text_width(f_string_static_t *text) {
+
+    if (!text->string[0]) {
+      return;
+    }
+
+    text->used = 0;
+    text->size = macro_f_utf_byte_width(text->string[0]);
+
+    if (text->size == 1) {
+      text->used = text->string[0] ? 1 : 0;
+    }
+    else if (text->used == 2) {
+      if (!text->string[0]) {
+        text->used = 1;
+      }
+      else {
+        text->used = text->string[1] ? 2 : 1;
+      }
+    }
+    else if (text->used == 3) {
+      if (!text->string[0]) {
+        text->used = 1;
+      }
+      else if (!text->string[1]) {
+        text->used = 2;
+      }
+      else {
+        text->used = text->string[2] ? 3 : 2;
+      }
+    }
+    else {
+      if (!text->string[0]) {
+        text->used = 1;
+      }
+      else if (!text->string[1]) {
+        text->used = 2;
+      }
+      else if (!text->string[2]) {
+        text->used = 3;
+      }
+      else {
+        text->used = text->string[3] ? 4 : 3;
+      }
+    }
+  }
+#endif // _di_utf8_process_text_width_
+
 #ifdef __cplusplus
 } // extern "C"
 #endif
index 0490b3ec107ebf0d29ab5d316252142c42f96e9f..618abdae767ad5ad80bcc30ec1e665d9d0434539 100644 (file)
@@ -52,6 +52,19 @@ extern "C" {
   extern f_status_t utf8_process_text(utf8_data_t * const data, const f_string_t text) F_attribute_visibility_internal_d;
 #endif // _di_utf8_process_text_
 
+/**
+ * Populate the text used and size based on the string.
+ *
+ * @param text
+ *   The character data.
+ *
+ *   The used represents the actual length of the character in bytes.
+ *   The size represents the expected length of the character in bytes.
+ */
+#ifndef _di_utf8_process_text_width_
+  extern void utf8_process_text_width(f_string_static_t *text) F_attribute_visibility_internal_d;
+#endif // _di_utf8_process_text_width_
+
 #ifdef __cplusplus
 } // extern "C"
 #endif
index fc414ac261d166081354ab4c83102cd12b92607d..781cd007825fc62db8831a136086690deb6d19e0 100644 (file)
@@ -129,7 +129,6 @@ extern "C" {
 
     // Handle last (incomplete) character when the buffer ended before the character is supposed to end.
     if (status != F_signal && next == F_false) {
-
       character.used = j;
 
       if (data->mode & utf8_mode_from_binary_d) {
index 6c8bd7c7c9958c4cd3b1a8b256d4272ef7c1d881..3569b60c50359f1b7880a02624e28319521ad980 100644 (file)
@@ -54,10 +54,7 @@ extern "C" {
       else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) {
         if (data->mode & utf8_mode_to_binary_d) {
           char byte[5] = { 0, 0, 0, 0, 0 };
-          f_string_static_t text = f_string_static_t_initialize;
-          text.string = byte;
-          text.used = macro_f_utf_byte_width(codepoint);
-          text.size = 5;
+          f_string_static_t text = macro_f_string_static_t_initialize(byte, 5);
 
           status = f_utf_unicode_from(codepoint, 4, &text.string);
 
@@ -66,6 +63,7 @@ extern "C" {
           }
           else {
             status = F_none;
+            text.used = macro_f_utf_byte_width(text.string[0]);
 
             fl_print_format("%s%r%s", data->file.stream, data->prepend, text, data->append);
           }
@@ -189,6 +187,7 @@ extern "C" {
     f_status_t status = F_none;
     bool valid = F_true;
     bool next = F_true;
+    uint8_t mode_codepoint = utf8_codepoint_mode_ready;
     uint16_t signal_check = 0;
 
     f_array_length_t i = 0;
@@ -226,7 +225,16 @@ extern "C" {
         } // for
 
         if (j == character.used) {
-          status = utf8_convert_binary(data, character);
+          if (data->mode & utf8_mode_from_binary_d) {
+            status = utf8_convert_binary(data, character);
+          }
+          else {
+            status = utf8_detect_codepoint(data, character, &mode_codepoint);
+
+            if (F_status_is_fine(status) && status != F_next) {
+              status = utf8_convert_codepoint(data, character, &mode_codepoint);
+            }
+          }
 
           if (status == F_utf) {
             valid = F_false;
@@ -246,7 +254,16 @@ extern "C" {
     if (status != F_signal && next == F_false) {
       character.used = j;
 
-      status = utf8_convert_binary(data, character);
+      if (data->mode & utf8_mode_from_binary_d) {
+        status = utf8_convert_binary(data, character);
+      }
+      else {
+        status = utf8_detect_codepoint(data, character, &mode_codepoint);
+
+        if (F_status_is_fine(status) && status != F_next) {
+          status = utf8_convert_codepoint(data, character, &mode_codepoint);
+        }
+      }
 
       if (status == F_utf) {
         valid = F_false;