]> Kevux Git Server - fll/commitdiff
Bugfix: Do not print leading zero's in large Unicode codepoints.
authorKevin Day <thekevinday@gmail.com>
Fri, 10 Dec 2021 01:36:39 +0000 (19:36 -0600)
committerKevin Day <thekevinday@gmail.com>
Fri, 10 Dec 2021 01:41:06 +0000 (19:41 -0600)
Also cleanup the code moving some generic print functions into utf8-specific print functions.
Use character rather than text to better communicate that the string is intended to represent a single Unicode character.

level_3/utf8/c/private-print.c
level_3/utf8/c/private-print.h
level_3/utf8/c/private-utf8_binary.c
level_3/utf8/c/private-utf8_codepoint.c

index 64d067e8368d8f19a84d2919658a146815c435f8..af811da5dd80434e1c49fcba1db8fc51fa422afe 100644 (file)
@@ -6,6 +6,13 @@
 extern "C" {
 #endif
 
+#ifndef _di_utf8_print_binary_
+  void utf8_print_binary(utf8_data_t * const data, const f_string_static_t character) {
+
+    fl_print_format("%s%r%s", data->file.stream, data->prepend, character, data->append);
+  }
+#endif // _di_utf8_print_binary_
+
 #ifndef _di_utf8_print_character_
   void utf8_print_character(utf8_data_t * const data, const f_string_static_t character, const f_color_set_t set) {
 
@@ -26,6 +33,21 @@ extern "C" {
   }
 #endif // _di_utf8_print_character_
 
+#ifndef _di_utf8_print_codepoint_
+  void utf8_print_codepoint(utf8_data_t * const data, const uint32_t codepoint) {
+
+    if (codepoint < 0xffff) {
+      fl_print_format("%sU+%04_U%s", data->file.stream, data->prepend, codepoint, data->append);
+    }
+    else if (codepoint < 0x100000) {
+      fl_print_format("%sU+%05_U%s", data->file.stream, data->prepend, codepoint, data->append);
+    }
+    else {
+      fl_print_format("%sU+%06_U%s", data->file.stream, data->prepend, codepoint, data->append);
+    }
+  }
+#endif // _di_utf8_print_codepoint_
+
 #ifndef _di_utf8_print_error_decode_
   void utf8_print_error_decode(utf8_data_t * const data, const f_status_t status, const f_string_static_t character) {
 
index 3e528dbe39ac4f48c27352908c4757f2ad4aef74..8b0049ec42bd8916de6ff20d670f6e4c6db34286 100644 (file)
@@ -13,6 +13,19 @@ extern "C" {
 #endif
 
 /**
+ * Print the binary character (such as '豸').
+ *
+ * @param data
+ *   The program data.
+ * @param character
+ *   The character to print.
+ *   This is a string that represents a single character.
+ */
+#ifndef _di_utf8_print_binary_
+  extern void utf8_print_binary(utf8_data_t * const data, const f_string_static_t character) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_binary_
+
+/**
  * Print the character either as a Unicode codeblock or as a binary.
  *
  * @param data
@@ -27,6 +40,19 @@ extern "C" {
 #endif // _di_utf8_print_character_
 
 /**
+ * Print the codepoint number as a codepoint string (such as U+8C78).
+ *
+ * @param data
+ *   The program data.
+ * @param codepoint
+ *   The codepoint to print.
+ *   This is the code that represents a single character.
+ */
+#ifndef _di_utf8_print_codepoint_
+  extern void utf8_print_codepoint(utf8_data_t * const data, const uint32_t codepoint) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_codepoint_
+
+/**
  * Print error message when attempt to decode the character failed.
  *
  * @param data
index 781cd007825fc62db8831a136086690deb6d19e0..aa7477243d88bacbf0226d8310d57bd04dc14749 100644 (file)
@@ -42,10 +42,10 @@ extern "C" {
     }
     else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) {
       if (data->mode & utf8_mode_to_binary_d) {
-        fl_print_format("%s%r%s", data->file.stream, data->prepend, character, data->append);
+        utf8_print_binary(data, character);
       }
       else {
-        fl_print_format(codepoint < 0xffff ? "%sU+%04_U%s" : "%sU+%6_U%s", data->file.stream, data->prepend, codepoint, data->append);
+        utf8_print_codepoint(data, codepoint);
       }
     }
 
index 3569b60c50359f1b7880a02624e28319521ad980..87e73bd9a2356fd5c70972a5adc77aa16ebe1af0 100644 (file)
@@ -54,22 +54,22 @@ extern "C" {
       else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) {
         if (data->mode & utf8_mode_to_binary_d) {
           char byte[5] = { 0, 0, 0, 0, 0 };
-          f_string_static_t text = macro_f_string_static_t_initialize(byte, 5);
+          f_string_static_t character = macro_f_string_static_t_initialize(byte, 5);
 
-          status = f_utf_unicode_from(codepoint, 4, &text.string);
+          status = f_utf_unicode_from(codepoint, 4, &character.string);
 
           if (F_status_is_error(status)) {
             utf8_print_error_decode(data, status, character);
           }
           else {
             status = F_none;
-            text.used = macro_f_utf_byte_width(text.string[0]);
+            character.used = macro_f_utf_byte_width(character.string[0]);
 
-            fl_print_format("%s%r%s", data->file.stream, data->prepend, text, data->append);
+            utf8_print_binary(data, character);
           }
         }
         else {
-          fl_print_format(codepoint < 0xffff ? "%sU+%04_U%s" : "%sU+%6_U%s", data->file.stream, data->prepend, codepoint, data->append);
+          utf8_print_codepoint(data, codepoint);
         }
       }
     }