]> Kevux Git Server - fll/commitdiff
Feature: Add missing functionality allowing the utf8 program to convert back to binar...
authorKevin Day <thekevinday@gmail.com>
Mon, 23 May 2022 01:45:31 +0000 (20:45 -0500)
committerKevin Day <thekevinday@gmail.com>
Mon, 23 May 2022 01:45:31 +0000 (20:45 -0500)
Even when there are invalid codepoints produced, it should be possible to convert the entire output back to the original data.
This is possible because the codepoint output by default still prints the invalid data as a hex-digit representing up to 4 bytes of data.
The combining and width parameters are also supported.

level_3/utf8/c/common.c
level_3/utf8/c/common.h
level_3/utf8/c/private-common.h
level_3/utf8/c/private-print.c
level_3/utf8/c/private-print.h
level_3/utf8/c/private-utf8.c
level_3/utf8/c/private-utf8_bytecode.c
level_3/utf8/c/private-utf8_codepoint.c
level_3/utf8/c/private-utf8_codepoint.h

index 20e8b88fe580a036668e826d95241cf1a2414171..4f81c0f9306824090ddbe2e6db3b9daa432fb7f7 100644 (file)
@@ -35,6 +35,8 @@ extern "C" {
   const f_string_static_t utf8_string_width_0_s = macro_f_string_static_t_initialize(UTF8_string_width_0_s, 0, UTF8_string_width_0_s_length);
   const f_string_static_t utf8_string_width_1_s = macro_f_string_static_t_initialize(UTF8_string_width_1_s, 0, UTF8_string_width_1_s_length);
   const f_string_static_t utf8_string_width_2_s = macro_f_string_static_t_initialize(UTF8_string_width_2_s, 0, UTF8_string_width_2_s_length);
+  const f_string_static_t utf8_string_width_3_s = macro_f_string_static_t_initialize(UTF8_string_width_3_s, 0, UTF8_string_width_3_s_length);
+  const f_string_static_t utf8_string_width_4_s = macro_f_string_static_t_initialize(UTF8_string_width_4_s, 0, UTF8_string_width_4_s_length);
 #endif // _di_utf8_defines_
 
 #ifndef _di_utf8_parameters_
index ab64ec506874661967e0255554896f639844e748..1a477d002f55ade0245d6ec264b5c21f58798286 100644 (file)
@@ -99,6 +99,8 @@ extern "C" {
   #define UTF8_string_width_0_s "0"
   #define UTF8_string_width_1_s "1"
   #define UTF8_string_width_2_s "2"
+  #define UTF8_string_width_3_s "3"
+  #define UTF8_string_width_4_s "4"
 
   #define UTF8_string_combining_is_s_length  1
   #define UTF8_string_combining_not_s_length 1
@@ -117,6 +119,8 @@ extern "C" {
   #define UTF8_string_width_0_s_length 1
   #define UTF8_string_width_1_s_length 1
   #define UTF8_string_width_2_s_length 1
+  #define UTF8_string_width_3_s_length 1
+  #define UTF8_string_width_4_s_length 1
 
   extern const f_string_static_t utf8_string_combining_is_s;
   extern const f_string_static_t utf8_string_combining_not_s;
@@ -133,6 +137,8 @@ extern "C" {
   extern const f_string_static_t utf8_string_width_0_s;
   extern const f_string_static_t utf8_string_width_1_s;
   extern const f_string_static_t utf8_string_width_2_s;
+  extern const f_string_static_t utf8_string_width_3_s;
+  extern const f_string_static_t utf8_string_width_4_s;
 
   extern const f_string_static_t utf8_string_valid_not_s;
 #endif // _di_utf8_defines_
index c3f5d49676d654b05e79b354a607fb00bcaa36b1..1226c1bb70442c999faeb79565198047e4b2f8cf 100644 (file)
@@ -13,14 +13,22 @@ extern "C" {
 #endif
 
 /**
+ * Codepoint modes for converting to/from binary and codepoint values.
+ *
+ * The special "raw" format is used only for reading from codepoint format where that format represents binary character that is not a valid Unicode character.
+ * This is intended to be used to save and restore the original binary data even if that data is invalid.
+ *
  * private_utf8_codepoint_mode_*:
- *   - ready:     The codepoint has yet to be processed, skip leading spaces until first 'U' is matched.
- *   - begin:     The first 'U' is matched, look for the '+'.
- *   - number:    The '+' is matched, process numbers.
- *   - end:       The last number is reached (at either white space or EOS/EOF).
- *   - bad:       This is not a valid codepoint.
- *   - bad_begin: This is the beginning of an invalid codepoint.
- *   - bad_end:   The end of bad is detected, which happens on white space or end of buffer.
+ *   - ready:      The codepoint has yet to be processed, skip leading spaces until first 'U' is matched.
+ *   - begin:      The first 'U' is matched, look for the '+'.
+ *   - number:     The '+' is matched, process numbers.
+ *   - end:        The last number is reached (at either white space or EOS/EOF).
+ *   - bad:        This is not a valid codepoint.
+ *   - bad_begin:  This is the beginning of an invalid codepoint.
+ *   - bad_end:    The end of bad is detected, which happens on white space or end of buffer.
+ *   - raw_begin:  This is the beginning of a potential raw data (matched '0').
+ *   - raw_number: This is the confirmed beginning of raw data (matched 'X'), process numbers.
+ *   - raw_end:    The end of raw data is detected, which happens on white space or end of buffer.
  */
 #ifndef _di_utf8_codepoint_modes_
   enum {
@@ -31,6 +39,9 @@ extern "C" {
     utf8_codepoint_mode_bad_e,
     utf8_codepoint_mode_bad_begin_e,
     utf8_codepoint_mode_bad_end_e,
+    utf8_codepoint_mode_raw_begin_e,
+    utf8_codepoint_mode_raw_number_e,
+    utf8_codepoint_mode_raw_end_e,
   };
 #endif // _di__utf8_codepoint_modes_
 
index cec0a5832e764ecc527b611f18728bf1e2e11c1b..7a6fcb1f56fcd3e80fe6f516c6718a93bd9bf289 100644 (file)
@@ -209,6 +209,113 @@ extern "C" {
   }
 #endif // _di_utf8_print_error_parameter_file_to_too_many_
 
+#ifndef _di_utf8_print_raw_bytecode_
+  void utf8_print_raw_bytecode(utf8_data_t * const data, const f_utf_char_t raw, const uint8_t width) {
+
+    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
+    if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
+
+    f_string_static_t character = macro_f_string_static_t_initialize(0, 0, width);
+
+    uint8_t byte[character.used];
+    character.string = byte;
+
+    if (raw) {
+      if (width == 1) {
+        byte[0] = (uint8_t) (raw & 0xff);
+      }
+      else if (width == 2) {
+        #ifdef _is_F_endian_big
+          byte[0] = (uint8_t) (raw & 0xff);
+          byte[1] = (uint8_t) ((raw & 0xff00) << 8);
+        #else
+          byte[0] = (uint8_t) ((raw & 0xff00) >> 8);
+          byte[1] = (uint8_t) (raw & 0xff);
+        #endif // _is_F_endian_big
+      }
+      else if (width == 3) {
+        #ifdef _is_F_endian_big
+          byte[0] = (uint8_t) (raw & 0xff);
+          byte[1] = (uint8_t) ((raw & 0xff00) << 8);
+          byte[2] = (uint8_t) ((raw & 0xff0000) << 16);
+        #else
+          byte[0] = (uint8_t) ((raw & 0xff0000) >> 16);
+          byte[1] = (uint8_t) ((raw & 0xff00) >> 8);
+          byte[2] = (uint8_t) (raw & 0xff);
+        #endif // _is_F_endian_big
+      }
+      else {
+        #ifdef _is_F_endian_big
+          byte[0] = (uint8_t) (raw & 0xff);
+          byte[1] = (uint8_t) ((raw & 0xff00) << 8);
+          byte[2] = (uint8_t) ((raw & 0xff0000) << 16);
+          byte[3] = (uint8_t) ((raw & 0xff000000) << 24);
+        #else
+          byte[0] = (uint8_t) ((raw & 0xff000000) >> 24);
+          byte[1] = (uint8_t) ((raw & 0xff0000) >> 16);
+          byte[2] = (uint8_t) ((raw & 0xff00) >> 8);
+          byte[3] = (uint8_t) (raw & 0xff);
+        #endif // _is_F_endian_big
+      }
+    }
+    else {
+      memset(byte, 0, sizeof(uint8_t) * width);
+    }
+
+    fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, character, data->valid_not, data->append);
+  }
+#endif // _di_utf8_print_raw_bytecode_
+
+#ifndef _di_utf8_print_raw_codepoint_
+  void utf8_print_raw_codepoint(utf8_data_t * const data, const f_string_static_t raw) {
+
+    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
+    if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
+
+    fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, raw, data->valid_not, data->append);
+  }
+#endif // _di_utf8_print_raw_codepoint_
+
+#ifndef _di_utf8_print_raw_combining_or_width_
+  void utf8_print_raw_combining_or_width(utf8_data_t * const data, const uint8_t width) {
+
+    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
+    if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
+
+    f_status_t status = F_none;
+
+    if (data->mode & utf8_mode_to_combining_d) {
+      fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+    }
+    else if (data->mode & utf8_mode_to_width_d) {
+      const f_string_static_t *character = 0;
+
+      switch (width) {
+        case 1:
+          character = &utf8_string_width_1_s;
+          break;
+
+        case 2:
+          character = &utf8_string_width_2_s;
+          break;
+
+        case 3:
+          character = &utf8_string_width_3_s;
+          break;
+
+        case 4:
+          character = &utf8_string_width_4_s;
+          break;
+
+        default:
+          character = &utf8_string_width_0_s;
+      }
+
+      fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, *character, data->valid_not, data->append);
+    }
+  }
+#endif // _di_utf8_print_raw_combining_or_width_
+
 #ifndef _di_utf8_print_section_header_file_
   void utf8_print_section_header_file(utf8_data_t * const data, const f_string_static_t name) {
 
index df35aa07e685584c50f0697eaac110b76bd0cdbf..c6f794d8149b67c76d6d215ff4a3d070062ba697 100644 (file)
@@ -152,6 +152,46 @@ extern "C" {
 #endif // _di_utf8_print_error_parameter_file_to_too_many_
 
 /**
+ * Print the raw character data (binary / bytecode).
+ *
+ * @param data
+ *   The program data.
+ * @param raw
+ *   The raw string in integer format.
+ * @param width
+ *   The width the raw character represents (a value inclusively from 1 to 4).
+ */
+#ifndef _di_utf8_print_raw_bytecode_
+  extern void utf8_print_raw_bytecode(utf8_data_t * const data, const f_utf_char_t raw, const uint8_t width) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_raw_bytecode_
+
+/**
+ * Print the raw character data (codepoint).
+ *
+ * @param data
+ *   The program data.
+ * @param raw
+ *   The raw string already in codepoint format.
+ */
+#ifndef _di_utf8_print_raw_codepoint_
+  extern void utf8_print_raw_codepoint(utf8_data_t * const data, const f_string_static_t raw) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_raw_codepoint_
+
+/**
+ * Print the width or combining state of the for a raw character.
+ *
+ * @param data
+ *   The program data.
+ * @param width
+ *   The pre-calculated width.
+ *
+ * @see utf8_print_width()
+ */
+#ifndef _di_utf8_print_raw_combining_or_width_
+  extern void utf8_print_raw_combining_or_width(utf8_data_t * const data, const uint8_t width) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_raw_combining_or_width_
+
+/**
  * Print the input file section header.
  *
  * @param data
index 3535e21f9ecd868fd1cf7e4597fd2daf2ee30781..e159450a849bf72f1f9223263b7d14a7794d2f74 100644 (file)
@@ -66,10 +66,13 @@ extern "C" {
     } // for
 
     if (F_status_is_error_not(status) && !(data->mode & utf8_mode_from_bytecode_d)) {
-      if (mode_codepoint != utf8_codepoint_mode_ready_e && mode_codepoint != utf8_codepoint_mode_end_e && mode_codepoint != utf8_codepoint_mode_bad_end_e) {
+      if (mode_codepoint != utf8_codepoint_mode_ready_e && mode_codepoint != utf8_codepoint_mode_end_e && mode_codepoint != utf8_codepoint_mode_bad_end_e && mode_codepoint != utf8_codepoint_mode_raw_end_e) {
         if (mode_codepoint == utf8_codepoint_mode_number_e) {
           mode_codepoint = utf8_codepoint_mode_end_e;
         }
+        else if (mode_codepoint == utf8_codepoint_mode_raw_number_e) {
+          mode_codepoint = utf8_codepoint_mode_raw_end_e;
+        }
         else {
           mode_codepoint = utf8_codepoint_mode_bad_end_e;
           valid = F_false;
@@ -77,7 +80,12 @@ extern "C" {
 
         text.used = 0;
 
-        status = utf8_convert_codepoint(data, text, &mode_codepoint);
+        if (mode_codepoint == utf8_codepoint_mode_raw_number_e) {
+          status = utf8_convert_raw(data, text, &mode_codepoint);
+        }
+        else {
+          status = utf8_convert_codepoint(data, text, &mode_codepoint);
+        }
       }
     }
 
index 419aef0d09b0c9fe2f61a989ea9cc8520f48ef40..6628d39c07f83c3f922a894aa6b6dd14a2637896 100644 (file)
@@ -73,7 +73,7 @@ extern "C" {
     f_array_length_t j = 0;
 
     f_char_t block_character[4] = { 0, 0, 0, 0 };
-    f_string_static_t character = macro_f_string_static_t_initialize2(block_character, 4);
+    f_string_static_t character = macro_f_string_static_t_initialize(block_character, 0, 4);
 
     do {
       status = f_file_read_block(file, &data->buffer);
index 689b718037fce5bd964ad9609010b2b027fe42e6..d0fdd6cf7a6d43136b34c2b8b9763333cfc18f3f 100644 (file)
@@ -26,10 +26,6 @@ extern "C" {
       } // for
     }
 
-    if (!(*mode == utf8_codepoint_mode_end_e || *mode == utf8_codepoint_mode_bad_end_e)) {
-      return F_none;
-    }
-
     if (*mode == utf8_codepoint_mode_end_e) {
       uint32_t codepoint = 0;
 
@@ -53,8 +49,8 @@ extern "C" {
       }
       else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
         if (data->mode & utf8_mode_to_bytecode_d) {
-          f_char_t byte[5] = { 0, 0, 0, 0, 0 };
-          f_string_static_t character = macro_f_string_static_t_initialize2(byte, 5);
+          f_char_t byte[4] = { 0, 0, 0, 0 };
+          f_string_static_t character = macro_f_string_static_t_initialize(byte, 0, 4);
 
           status = f_utf_unicode_from(codepoint, 4, &character.string);
 
@@ -76,11 +72,14 @@ extern "C" {
         }
       }
     }
-    else {
+    else if (*mode == utf8_codepoint_mode_bad_end_e) {
       status = F_none;
 
       utf8_print_character_invalid(data, character);
     }
+    else {
+      return F_none;
+    }
 
     *mode = utf8_codepoint_mode_ready_e;
     data->text.used = 0;
@@ -93,6 +92,90 @@ extern "C" {
   }
 #endif // _di_utf8_convert_codepoint_
 
+#ifndef _di_utf8_convert_raw_
+  f_status_t utf8_convert_raw(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) {
+
+    f_status_t status = F_none;
+    bool valid_not = F_false;
+
+    if (*mode != utf8_codepoint_mode_raw_end_e) {
+      if (data->text.used + character.used >= data->text.size) {
+        status = f_string_dynamic_increase_by(utf8_default_allocation_step_d, &data->text);
+        if (F_status_is_error(status)) return status;
+      }
+
+      for (f_array_length_t i = 0; i < character.used; ++i) {
+        data->text.string[data->text.used++] = character.string[i];
+      } // for
+    }
+
+    if (*mode == utf8_codepoint_mode_raw_end_e) {
+      f_utf_char_t raw = 0;
+
+      {
+        f_number_unsigned_t number = 0;
+
+        status = fl_conversion_dynamic_to_number_unsigned(data->text, &number);
+
+        raw = (f_utf_char_t) number;
+      }
+
+      if (F_status_is_error(status)) {
+        status = F_status_set_fine(status);
+
+        if (status == F_number || status == F_utf_not || status == F_complete_not_utf || status == F_utf_fragment || status == F_number_decimal || status == F_number_negative || status == F_number_positive || status == F_number_overflow) {
+          valid_not = F_true;
+
+          utf8_print_character_invalid(data, character);
+        }
+        else {
+          status = F_status_set_error(status);
+
+          utf8_print_error_decode(data, status, character);
+
+          return status;
+        }
+      }
+      else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
+
+        // The width actually includes the leading '0x', which is not part of the width of the digit in binary form.
+        uint8_t width = data->text.used > 1 ? (data->text.used - 2) / 2 : 0;
+
+        if ((data->text.used - 2) % 2) {
+          ++width;
+        }
+
+        if (data->mode & utf8_mode_to_bytecode_d) {
+          utf8_print_raw_bytecode(data, raw, width);
+        }
+        else if (data->mode & utf8_mode_to_codepoint_d) {
+          utf8_print_raw_codepoint(data, data->text);
+        }
+        else {
+          utf8_print_raw_combining_or_width(data, width);
+        }
+      }
+    }
+    else if (*mode == utf8_codepoint_mode_bad_end_e) {
+      status = F_none;
+
+      utf8_print_character_invalid(data, character);
+    }
+    else {
+      return F_none;
+    }
+
+    *mode = utf8_codepoint_mode_ready_e;
+    data->text.used = 0;
+
+    if (valid_not || F_status_is_error(status)) {
+      return F_valid_not;
+    }
+
+    return status;
+  }
+#endif // _di_utf8_convert_raw_
+
 #ifndef _di_utf8_detect_codepoint_
   f_status_t utf8_detect_codepoint(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) {
 
@@ -106,6 +189,9 @@ extern "C" {
     if (character.string[0] == f_string_ascii_u_s.string[0] || character.string[0] == f_string_ascii_U_s.string[0] || character.string[0] == f_string_ascii_plus_s.string[0]) {
       // Do nothing.
     }
+    else if (character.string[0] == f_string_ascii_0_s.string[0] || character.string[0] == f_string_ascii_x_s.string[0] || character.string[0] == f_string_ascii_X_s.string[0]) {
+      // Do nothing.
+    }
     else if (character.string[0] == f_string_ascii_space_s.string[0]) {
       status = F_space;
     }
@@ -171,6 +257,10 @@ extern "C" {
           *mode = utf8_codepoint_mode_begin_e;
           data->text.used = 0;
         }
+        else if (character.string[0] == f_string_ascii_0_s.string[0]) {
+          *mode = utf8_codepoint_mode_raw_begin_e;
+          data->text.used = 0;
+        }
         else {
           *mode = utf8_codepoint_mode_bad_e;
         }
@@ -183,6 +273,19 @@ extern "C" {
           *mode = utf8_codepoint_mode_bad_e;
         }
       }
+      else if (*mode == utf8_codepoint_mode_raw_begin_e) {
+        if (character.string[0] == f_string_ascii_x_s.string[0] || character.string[0] == f_string_ascii_X_s.string[0]) {
+          *mode = utf8_codepoint_mode_raw_number_e;
+        }
+        else {
+          *mode = utf8_codepoint_mode_bad_e;
+        }
+      }
+      else if (*mode == utf8_codepoint_mode_raw_number_e) {
+        if (status == F_space) {
+          *mode = utf8_codepoint_mode_raw_end_e;
+        }
+      }
       else if (*mode == utf8_codepoint_mode_number_e) {
         if (status == F_space) {
           *mode = utf8_codepoint_mode_end_e;
@@ -205,8 +308,8 @@ extern "C" {
     f_array_length_t i = 0;
     f_array_length_t j = 0;
 
-    f_char_t block[4] = { 0, 0, 0, 0 };
-    f_string_static_t character = macro_f_string_static_t_initialize2(block, 4);
+    f_char_t block[5] = { 0, 0, 0, 0, 0 };
+    f_string_static_t character = macro_f_string_static_t_initialize(block, 0, 4);
 
     do {
       status = f_file_read_block(file, &data->buffer);
@@ -247,7 +350,15 @@ extern "C" {
             status = utf8_detect_codepoint(data, character, &mode_codepoint);
 
             if (F_status_is_fine(status) && status != F_next) {
-              status = utf8_convert_codepoint(data, character, &mode_codepoint);
+              if (mode_codepoint == utf8_codepoint_mode_raw_begin_e || mode_codepoint == utf8_codepoint_mode_raw_number_e || mode_codepoint == utf8_codepoint_mode_raw_end_e) {
+                status = utf8_convert_raw(data, character, &mode_codepoint);
+
+                // Raw mode represents an invalid Unicode sequence.
+                valid = F_false;
+              }
+              else {
+                status = utf8_convert_codepoint(data, character, &mode_codepoint);
+              }
             }
           }
 
@@ -276,7 +387,15 @@ extern "C" {
         status = utf8_detect_codepoint(data, character, &mode_codepoint);
 
         if (F_status_is_fine(status) && status != F_next) {
-          status = utf8_convert_codepoint(data, character, &mode_codepoint);
+          if (mode_codepoint == utf8_codepoint_mode_raw_begin_e || mode_codepoint == utf8_codepoint_mode_raw_number_e || mode_codepoint == utf8_codepoint_mode_raw_end_e) {
+            status = utf8_convert_raw(data, character, &mode_codepoint);
+
+            // Raw mode represents an invalid Unicode sequence.
+            valid = F_false;
+          }
+          else {
+            status = utf8_convert_codepoint(data, character, &mode_codepoint);
+          }
         }
       }
 
index 184b9fb8c76102f68b133c705a96b219abb0508e..199895e22ee3f460166ba4e523e5e4eeecc3341f 100644 (file)
@@ -15,7 +15,7 @@ extern "C" {
 /**
  * Convert a codepoint character representation to another format.
  *
- * This automatically determines the output format and is also handles the verify process.
+ * This automatically determines the output format and also handles the verify process.
  *
  * @param data
  *   The program data.
@@ -28,15 +28,43 @@ extern "C" {
  *   F_none on success.
  *   F_utf_not on invalid UTF-8 (which is still "success" when verifying).
  *
- *   F_utf_not (with error bit) if not verifying and
+ *   F_utf_not (with error bit) if not verifying and the Unicode value is invalid.
  *
  *   Errors (with error bit) from: f_utf_unicode_to()
+ *
+ * @see f_utf_unicode_to()
  */
 #ifndef _di_utf8_convert_codepoint_
   extern f_status_t utf8_convert_codepoint(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) F_attribute_visibility_internal_d;
 #endif // _di_utf8_convert_codepoint_
 
 /**
+ * Convert a raw character representation (hex-digit) to another format.
+ *
+ * This automatically determines the output format and also handles the verify process.
+ *
+ * @param data
+ *   The program data.
+ * @param character
+ *   The a single character currently being processed.
+ * @param mode
+ *   The codepoint mode the text is currently in.
+ *
+ * @return
+ *   F_none on success.
+ *   F_valid_not on invalid raw (which is still "success" when verifying).
+ *
+ *   F_valid_not (with error bit) if not verifying and the raw value is invalid.
+ *
+ *   Errors (with error bit) from: f_utf_unicode_to()
+ *
+ * @see f_utf_unicode_to()
+ */
+#ifndef _di_utf8_convert_raw_
+  extern f_status_t utf8_convert_raw(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) F_attribute_visibility_internal_d;
+#endif // _di_utf8_convert_raw_
+
+/**
  * Detect a codepoint character.
  *
  * @param data