Bugfix: Combining and Width detection for utf8 are not properly printing.

author Kevin Day <thekevinday@gmail.com>

Mon, 23 May 2022 02:39:27 +0000 (21:39 -0500)

committer Kevin Day <thekevinday@gmail.com>

Mon, 23 May 2022 02:41:35 +0000 (21:41 -0500)
author Kevin Day <thekevinday@gmail.com>
Mon, 23 May 2022 02:39:27 +0000 (21:39 -0500)
committer Kevin Day <thekevinday@gmail.com>
Mon, 23 May 2022 02:41:35 +0000 (21:41 -0500)
diff --git a/level_3/utf8/c/common.h b/level_3/utf8/c/common.h

index 2852a159c3e8e7d24c4eeb4f67925a2321b7188c..8d231fdf0cf25dfa53d91bf72df1ebe01cf73a37 100644 (file)
--- a/level_3/utf8/c/common.h
+++ b/level_3/utf8/c/common.h
@@ -297,22 +297,22 @@ extern "C" {
   * Modes used to designate how to the input and output are to be processed.
   *
   * utf8_mode_from_*:
- *   - bytesequence:  The input format is bytesequence.
- *   - codepoint: The input format is codepoint (U+XXXX or U+XXXXXX).
+ *   - bytesequence: The input format is bytesequence.
+ *   - codepoint:    The input format is codepoint (U+XXXX or U+XXXXXX).
   *
   * utf8_mode_to_*:
- *   - bytesequence:  The outout format is bytesequence.
- *   - codepoint: The outout format is codepoint (U+XXXX or U+XXXXXX).
- *   - combining: The outout format is whether or not character is combining (may be used with "width").
- *   - width:     The outout format is how wide the character is (may be used with "combining").
+ *   - bytesequence: The outout format is bytesequence.
+ *   - codepoint:    The outout format is codepoint (U+XXXX or U+XXXXXX).
+ *   - combining:    The outout format is whether or not character is combining (may be used with "width").
+ *   - width:        The outout format is how wide the character is (may be used with "combining").
   */
  #ifndef _di_utf8_modes_
-  #define utf8_mode_from_bytesequence_d  0x1
-  #define utf8_mode_from_codepoint_d 0x2
-  #define utf8_mode_to_bytesequence_d    0x4
-  #define utf8_mode_to_codepoint_d   0x8
-  #define utf8_mode_to_combining_d   0x10
-  #define utf8_mode_to_width_d       0x20
+  #define utf8_mode_from_bytesequence_d 0x1
+  #define utf8_mode_from_codepoint_d    0x2
+  #define utf8_mode_to_bytesequence_d   0x4
+  #define utf8_mode_to_codepoint_d      0x8
+  #define utf8_mode_to_combining_d      0x10
+  #define utf8_mode_to_width_d          0x20
  #endif // _di_utf8_modes_
  
  #ifdef __cplusplus
diff --git a/level_3/utf8/c/private-print.c b/level_3/utf8/c/private-print.c

index 4f7a9aa84e0412df66acc22c64ddf0c84c74e942..ea02bfec4e302fb444445f82d30a0d5cdf213ea9 100644 (file)
--- a/level_3/utf8/c/private-print.c
+++ b/level_3/utf8/c/private-print.c
@@ -82,7 +82,7 @@ extern "C" {
          }
        }
        else if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_none_e && data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
-        fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+        utf8_print_error_combining_or_width(data);
        }
      }
      else if (data->mode & utf8_mode_to_width_d) {
@@ -91,6 +91,16 @@ extern "C" {
    }
  #endif // _di_utf8_print_combining_or_width_
  
+#ifndef _di_utf8_print_error_combining_or_width_
+  void utf8_print_error_combining_or_width(utf8_data_t * const data) {
+
+    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
+    if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
+
+    fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+  }
+#endif // _di_utf8_print_error_combining_or_width_
+
  #ifndef _di_utf8_print_error_decode_
    void utf8_print_error_decode(utf8_data_t * const data, const f_status_t status, const f_string_static_t character) {
  
@@ -285,7 +295,7 @@ extern "C" {
      f_status_t status = F_none;
  
      if (data->mode & utf8_mode_to_combining_d) {
-      fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+      utf8_print_error_combining_or_width(data);
      }
      else if (data->mode & utf8_mode_to_width_d) {
        const f_string_static_t *character = 0;
@@ -407,9 +417,38 @@ extern "C" {
        }
      }
  
-    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_none_e && data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
-      fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+    utf8_print_error_combining_or_width(data);
+  }
+#endif // _di_utf8_print_width_
+
+#ifndef _di_utf8_print_width_codepoint_
+  void utf8_print_width_codepoint(utf8_data_t * const data, const f_string_static_t character) {
+
+    f_status_t status = f_utf_is_wide(character.string, character.used);
+
+    if (status == F_true) {
+      fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_2_s, data->append);
+
+      return;
      }
+
+    if (status == F_false) {
+      status = f_utf_is_graph(character.string, character.used);
+
+      if (status == F_true) {
+        fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_1_s, data->append);
+
+        return;
+      }
+
+      if (status == F_false) {
+        fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_0_s, data->append);
+
+        return;
+      }
+    }
+
+    utf8_print_error_combining_or_width(data);
    }
  #endif // _di_utf8_print_width_
  
diff --git a/level_3/utf8/c/private-print.h b/level_3/utf8/c/private-print.h

index 8b3b3c8270e337ae4db0f4164c24a9411d2bda09..8afd76b0e812bc479067e448b9507528b88c4c60 100644 (file)
--- a/level_3/utf8/c/private-print.h
+++ b/level_3/utf8/c/private-print.h
@@ -69,6 +69,16 @@ extern "C" {
  #endif // _di_utf8_print_combining_or_width_
  
  /**
+ * Print an error regarding the width or combining state of a some character.
+ *
+ * @param data
+ *   The program data.
+ */
+#ifndef _di_utf8_print_error_combining_or_width_
+  extern void utf8_print_error_combining_or_width(utf8_data_t * const data) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_error_combining_or_width_
+
+/**
   * Print error message when attempt to decode the character failed.
   *
   * @param data
diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c

index 20b89a4c9ddb254a2a47c52ae4b2f10e8c787cc1..30cbe37a495e23b22b4e252ba40dad697a641eba 100644 (file)
--- a/level_3/utf8/c/private-utf8_codepoint.c
+++ b/level_3/utf8/c/private-utf8_codepoint.c
@@ -48,27 +48,34 @@ extern "C" {
          }
        }
        else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
-        if (data->mode & utf8_mode_to_bytesequence_d) {
+        if (data->mode & utf8_mode_to_codepoint_d) {
+          utf8_print_codepoint(data, codepoint);
+        }
+        else {
            f_char_t byte[4] = { 0, 0, 0, 0 };
            f_string_static_t character = macro_f_string_static_t_initialize(byte, 0, 4);
  
            status = f_utf_unicode_from(codepoint, 4, &character.string);
  
            if (F_status_is_error(status)) {
-            utf8_print_error_encode(data, status, codepoint);
+            if (data->mode & utf8_mode_to_bytesequence_d) {
+              utf8_print_error_encode(data, status, codepoint);
+            }
+            else {
+              utf8_print_error_combining_or_width(data);
+            }
            }
-          else {
+          else if (data->mode & utf8_mode_to_bytesequence_d) {
              status = F_none;
              character.used = macro_f_utf_byte_width(character.string[0]);
  
              utf8_print_bytesequence(data, character);
            }
-        }
-        else if (data->mode & utf8_mode_to_codepoint_d) {
-          utf8_print_codepoint(data, codepoint);
-        }
-        else {
-          utf8_print_combining_or_width(data, character);
+          else {
+            status = F_none;
+
+            utf8_print_combining_or_width(data, character);
+          }
          }
        }
      }
author	Kevin Day <thekevinday@gmail.com>
	Mon, 23 May 2022 02:39:27 +0000 (21:39 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Mon, 23 May 2022 02:41:35 +0000 (21:41 -0500)
level_3/utf8/c/common.h		patch \| blob \| history
level_3/utf8/c/private-print.c		patch \| blob \| history
level_3/utf8/c/private-print.h		patch \| blob \| history
level_3/utf8/c/private-utf8_codepoint.c		patch \| blob \| history