Bugfix: Codepoint to Binary is not working.

author Kevin Day <thekevinday@gmail.com>

Fri, 10 Dec 2021 01:17:43 +0000 (19:17 -0600)

committer Kevin Day <thekevinday@gmail.com>

Fri, 10 Dec 2021 01:17:43 +0000 (19:17 -0600)
author Kevin Day <thekevinday@gmail.com>
Fri, 10 Dec 2021 01:17:43 +0000 (19:17 -0600)
committer Kevin Day <thekevinday@gmail.com>
Fri, 10 Dec 2021 01:17:43 +0000 (19:17 -0600)
diff --git a/level_3/utf8/c/private-utf8.c b/level_3/utf8/c/private-utf8.c

index 70d99d668f7b356c2b2c17618dfdea184a1739c6..ccee4d40d1f7ca3c48bffe80d9d4f6e9149fbdb0 100644 (file)
--- a/level_3/utf8/c/private-utf8.c
+++ b/level_3/utf8/c/private-utf8.c
@@ -32,11 +32,13 @@ extern "C" {
      bool valid = F_true;
      uint8_t mode_codepoint = utf8_codepoint_mode_ready;
  
-    f_string_static_t character = macro_f_string_static_t_initialize(text, 4);
+    f_string_static_t current = macro_f_string_static_t_initialize(text, 0);
+
+    utf8_process_text_width(&current);
  
      flockfile(data->file.stream);
  
-    for (uint16_t signal_check = 0; *character.string && F_status_is_error_not(status); ) {
+    for (uint16_t signal_check = 0; current.string[0] && F_status_is_error_not(status); ) {
  
        if (!((++signal_check) % utf8_signal_check_d)) {
          if (utf8_signal_received(data)) {
@@ -48,53 +50,24 @@ extern "C" {
        }
  
        status = F_none;
-      character.used = macro_f_utf_byte_width(*character.string);
-
-      // Re-adjust used if buffer ended before the character is supposed to end.
-      if (character.string[0]) {
-        if (character.used > 1) {
-          if (character.string[1]) {
-            if (character.used > 2) {
-              if (character.string[2]) {
-                if (character.used > 3) {
-                  if (character.string[3]) {
-                    character.used = 4;
-                  }
-                  else {
-                    character.used = 3;
-                  }
-                }
-              }
-              else {
-                character.used = 2;
-              }
-            }
-          }
-          else {
-            character.used = 1;
-          }
-        }
-      }
-      else {
-        character.used = 0;
-      }
  
        if (data->mode & utf8_mode_from_binary_d) {
-        status = utf8_convert_binary(data, character);
+        status = utf8_convert_binary(data, current);
        }
        else {
-        status = utf8_detect_codepoint(data, character, &mode_codepoint);
+        status = utf8_detect_codepoint(data, current, &mode_codepoint);
  
          if (F_status_is_fine(status) && status != F_next) {
-          status = utf8_convert_codepoint(data, character, &mode_codepoint);
+          status = utf8_convert_codepoint(data, current, &mode_codepoint);
          }
        }
  
-      character.string += character.used;
-
        if (status == F_utf) {
          valid = F_false;
        }
+
+      current.string += current.used;
+      utf8_process_text_width(&current);
      } // for
  
      if (F_status_is_error_not(status) && !(data->mode & utf8_mode_from_binary_d)) {
@@ -107,9 +80,9 @@ extern "C" {
            valid = F_false;
          }
  
-        character.used = 0;
+        current.used = 0;
  
-        status = utf8_convert_codepoint(data, character, &mode_codepoint);
+        status = utf8_convert_codepoint(data, current, &mode_codepoint);
        }
      }
  
@@ -133,6 +106,55 @@ extern "C" {
    }
  #endif // _di_utf8_process_text_
  
+#ifndef _di_utf8_process_text_width_
+  void utf8_process_text_width(f_string_static_t *text) {
+
+    if (!text->string[0]) {
+      return;
+    }
+
+    text->used = 0;
+    text->size = macro_f_utf_byte_width(text->string[0]);
+
+    if (text->size == 1) {
+      text->used = text->string[0] ? 1 : 0;
+    }
+    else if (text->used == 2) {
+      if (!text->string[0]) {
+        text->used = 1;
+      }
+      else {
+        text->used = text->string[1] ? 2 : 1;
+      }
+    }
+    else if (text->used == 3) {
+      if (!text->string[0]) {
+        text->used = 1;
+      }
+      else if (!text->string[1]) {
+        text->used = 2;
+      }
+      else {
+        text->used = text->string[2] ? 3 : 2;
+      }
+    }
+    else {
+      if (!text->string[0]) {
+        text->used = 1;
+      }
+      else if (!text->string[1]) {
+        text->used = 2;
+      }
+      else if (!text->string[2]) {
+        text->used = 3;
+      }
+      else {
+        text->used = text->string[3] ? 4 : 3;
+      }
+    }
+  }
+#endif // _di_utf8_process_text_width_
+
  #ifdef __cplusplus
  } // extern "C"
  #endif
diff --git a/level_3/utf8/c/private-utf8.h b/level_3/utf8/c/private-utf8.h

index 0490b3ec107ebf0d29ab5d316252142c42f96e9f..618abdae767ad5ad80bcc30ec1e665d9d0434539 100644 (file)
--- a/level_3/utf8/c/private-utf8.h
+++ b/level_3/utf8/c/private-utf8.h
@@ -52,6 +52,19 @@ extern "C" {
    extern f_status_t utf8_process_text(utf8_data_t * const data, const f_string_t text) F_attribute_visibility_internal_d;
  #endif // _di_utf8_process_text_
  
+/**
+ * Populate the text used and size based on the string.
+ *
+ * @param text
+ *   The character data.
+ *
+ *   The used represents the actual length of the character in bytes.
+ *   The size represents the expected length of the character in bytes.
+ */
+#ifndef _di_utf8_process_text_width_
+  extern void utf8_process_text_width(f_string_static_t *text) F_attribute_visibility_internal_d;
+#endif // _di_utf8_process_text_width_
+
  #ifdef __cplusplus
  } // extern "C"
  #endif
diff --git a/level_3/utf8/c/private-utf8_binary.c b/level_3/utf8/c/private-utf8_binary.c

index fc414ac261d166081354ab4c83102cd12b92607d..781cd007825fc62db8831a136086690deb6d19e0 100644 (file)
--- a/level_3/utf8/c/private-utf8_binary.c
+++ b/level_3/utf8/c/private-utf8_binary.c
@@ -129,7 +129,6 @@ extern "C" {
  
      // Handle last (incomplete) character when the buffer ended before the character is supposed to end.
      if (status != F_signal && next == F_false) {
-
        character.used = j;
  
        if (data->mode & utf8_mode_from_binary_d) {
diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c

index 6c8bd7c7c9958c4cd3b1a8b256d4272ef7c1d881..3569b60c50359f1b7880a02624e28319521ad980 100644 (file)
--- a/level_3/utf8/c/private-utf8_codepoint.c
+++ b/level_3/utf8/c/private-utf8_codepoint.c
@@ -54,10 +54,7 @@ extern "C" {
        else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) {
          if (data->mode & utf8_mode_to_binary_d) {
            char byte[5] = { 0, 0, 0, 0, 0 };
-          f_string_static_t text = f_string_static_t_initialize;
-          text.string = byte;
-          text.used = macro_f_utf_byte_width(codepoint);
-          text.size = 5;
+          f_string_static_t text = macro_f_string_static_t_initialize(byte, 5);
  
            status = f_utf_unicode_from(codepoint, 4, &text.string);
  
@@ -66,6 +63,7 @@ extern "C" {
            }
            else {
              status = F_none;
+            text.used = macro_f_utf_byte_width(text.string[0]);
  
              fl_print_format("%s%r%s", data->file.stream, data->prepend, text, data->append);
            }
@@ -189,6 +187,7 @@ extern "C" {
      f_status_t status = F_none;
      bool valid = F_true;
      bool next = F_true;
+    uint8_t mode_codepoint = utf8_codepoint_mode_ready;
      uint16_t signal_check = 0;
  
      f_array_length_t i = 0;
@@ -226,7 +225,16 @@ extern "C" {
          } // for
  
          if (j == character.used) {
-          status = utf8_convert_binary(data, character);
+          if (data->mode & utf8_mode_from_binary_d) {
+            status = utf8_convert_binary(data, character);
+          }
+          else {
+            status = utf8_detect_codepoint(data, character, &mode_codepoint);
+
+            if (F_status_is_fine(status) && status != F_next) {
+              status = utf8_convert_codepoint(data, character, &mode_codepoint);
+            }
+          }
  
            if (status == F_utf) {
              valid = F_false;
@@ -246,7 +254,16 @@ extern "C" {
      if (status != F_signal && next == F_false) {
        character.used = j;
  
-      status = utf8_convert_binary(data, character);
+      if (data->mode & utf8_mode_from_binary_d) {
+        status = utf8_convert_binary(data, character);
+      }
+      else {
+        status = utf8_detect_codepoint(data, character, &mode_codepoint);
+
+        if (F_status_is_fine(status) && status != F_next) {
+          status = utf8_convert_codepoint(data, character, &mode_codepoint);
+        }
+      }
  
        if (status == F_utf) {
          valid = F_false;
author	Kevin Day <thekevinday@gmail.com>
	Fri, 10 Dec 2021 01:17:43 +0000 (19:17 -0600)
committer	Kevin Day <thekevinday@gmail.com>
	Fri, 10 Dec 2021 01:17:43 +0000 (19:17 -0600)
level_3/utf8/c/private-utf8.c		patch \| blob \| history
level_3/utf8/c/private-utf8.h		patch \| blob \| history
level_3/utf8/c/private-utf8_binary.c		patch \| blob \| history
level_3/utf8/c/private-utf8_codepoint.c		patch \| blob \| history