Feature: Add missing functionality allowing the utf8 program to convert back to binar...

author Kevin Day <thekevinday@gmail.com>

Mon, 23 May 2022 01:45:31 +0000 (20:45 -0500)

committer Kevin Day <thekevinday@gmail.com>

Mon, 23 May 2022 01:45:31 +0000 (20:45 -0500)
author Kevin Day <thekevinday@gmail.com>
Mon, 23 May 2022 01:45:31 +0000 (20:45 -0500)
committer Kevin Day <thekevinday@gmail.com>
Mon, 23 May 2022 01:45:31 +0000 (20:45 -0500)
diff --git a/level_3/utf8/c/common.c b/level_3/utf8/c/common.c

index 20e8b88fe580a036668e826d95241cf1a2414171..4f81c0f9306824090ddbe2e6db3b9daa432fb7f7 100644 (file)
--- a/level_3/utf8/c/common.c
+++ b/level_3/utf8/c/common.c
@@ -35,6 +35,8 @@ extern "C" {
    const f_string_static_t utf8_string_width_0_s = macro_f_string_static_t_initialize(UTF8_string_width_0_s, 0, UTF8_string_width_0_s_length);
    const f_string_static_t utf8_string_width_1_s = macro_f_string_static_t_initialize(UTF8_string_width_1_s, 0, UTF8_string_width_1_s_length);
    const f_string_static_t utf8_string_width_2_s = macro_f_string_static_t_initialize(UTF8_string_width_2_s, 0, UTF8_string_width_2_s_length);
+  const f_string_static_t utf8_string_width_3_s = macro_f_string_static_t_initialize(UTF8_string_width_3_s, 0, UTF8_string_width_3_s_length);
+  const f_string_static_t utf8_string_width_4_s = macro_f_string_static_t_initialize(UTF8_string_width_4_s, 0, UTF8_string_width_4_s_length);
  #endif // _di_utf8_defines_
  
  #ifndef _di_utf8_parameters_
diff --git a/level_3/utf8/c/common.h b/level_3/utf8/c/common.h

index ab64ec506874661967e0255554896f639844e748..1a477d002f55ade0245d6ec264b5c21f58798286 100644 (file)
--- a/level_3/utf8/c/common.h
+++ b/level_3/utf8/c/common.h
@@ -99,6 +99,8 @@ extern "C" {
    #define UTF8_string_width_0_s "0"
    #define UTF8_string_width_1_s "1"
    #define UTF8_string_width_2_s "2"
+  #define UTF8_string_width_3_s "3"
+  #define UTF8_string_width_4_s "4"
  
    #define UTF8_string_combining_is_s_length  1
    #define UTF8_string_combining_not_s_length 1
@@ -117,6 +119,8 @@ extern "C" {
    #define UTF8_string_width_0_s_length 1
    #define UTF8_string_width_1_s_length 1
    #define UTF8_string_width_2_s_length 1
+  #define UTF8_string_width_3_s_length 1
+  #define UTF8_string_width_4_s_length 1
  
    extern const f_string_static_t utf8_string_combining_is_s;
    extern const f_string_static_t utf8_string_combining_not_s;
@@ -133,6 +137,8 @@ extern "C" {
    extern const f_string_static_t utf8_string_width_0_s;
    extern const f_string_static_t utf8_string_width_1_s;
    extern const f_string_static_t utf8_string_width_2_s;
+  extern const f_string_static_t utf8_string_width_3_s;
+  extern const f_string_static_t utf8_string_width_4_s;
  
    extern const f_string_static_t utf8_string_valid_not_s;
  #endif // _di_utf8_defines_
diff --git a/level_3/utf8/c/private-common.h b/level_3/utf8/c/private-common.h

index c3f5d49676d654b05e79b354a607fb00bcaa36b1..1226c1bb70442c999faeb79565198047e4b2f8cf 100644 (file)
--- a/level_3/utf8/c/private-common.h
+++ b/level_3/utf8/c/private-common.h
@@ -13,14 +13,22 @@ extern "C" {
  #endif
  
  /**
+ * Codepoint modes for converting to/from binary and codepoint values.
+ *
+ * The special "raw" format is used only for reading from codepoint format where that format represents binary character that is not a valid Unicode character.
+ * This is intended to be used to save and restore the original binary data even if that data is invalid.
+ *
   * private_utf8_codepoint_mode_*:
- *   - ready:     The codepoint has yet to be processed, skip leading spaces until first 'U' is matched.
- *   - begin:     The first 'U' is matched, look for the '+'.
- *   - number:    The '+' is matched, process numbers.
- *   - end:       The last number is reached (at either white space or EOS/EOF).
- *   - bad:       This is not a valid codepoint.
- *   - bad_begin: This is the beginning of an invalid codepoint.
- *   - bad_end:   The end of bad is detected, which happens on white space or end of buffer.
+ *   - ready:      The codepoint has yet to be processed, skip leading spaces until first 'U' is matched.
+ *   - begin:      The first 'U' is matched, look for the '+'.
+ *   - number:     The '+' is matched, process numbers.
+ *   - end:        The last number is reached (at either white space or EOS/EOF).
+ *   - bad:        This is not a valid codepoint.
+ *   - bad_begin:  This is the beginning of an invalid codepoint.
+ *   - bad_end:    The end of bad is detected, which happens on white space or end of buffer.
+ *   - raw_begin:  This is the beginning of a potential raw data (matched '0').
+ *   - raw_number: This is the confirmed beginning of raw data (matched 'X'), process numbers.
+ *   - raw_end:    The end of raw data is detected, which happens on white space or end of buffer.
   */
  #ifndef _di_utf8_codepoint_modes_
    enum {
@@ -31,6 +39,9 @@ extern "C" {
      utf8_codepoint_mode_bad_e,
      utf8_codepoint_mode_bad_begin_e,
      utf8_codepoint_mode_bad_end_e,
+    utf8_codepoint_mode_raw_begin_e,
+    utf8_codepoint_mode_raw_number_e,
+    utf8_codepoint_mode_raw_end_e,
    };
  #endif // _di__utf8_codepoint_modes_
  
diff --git a/level_3/utf8/c/private-print.c b/level_3/utf8/c/private-print.c

index cec0a5832e764ecc527b611f18728bf1e2e11c1b..7a6fcb1f56fcd3e80fe6f516c6718a93bd9bf289 100644 (file)
--- a/level_3/utf8/c/private-print.c
+++ b/level_3/utf8/c/private-print.c
@@ -209,6 +209,113 @@ extern "C" {
    }
  #endif // _di_utf8_print_error_parameter_file_to_too_many_
  
+#ifndef _di_utf8_print_raw_bytecode_
+  void utf8_print_raw_bytecode(utf8_data_t * const data, const f_utf_char_t raw, const uint8_t width) {
+
+    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
+    if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
+
+    f_string_static_t character = macro_f_string_static_t_initialize(0, 0, width);
+
+    uint8_t byte[character.used];
+    character.string = byte;
+
+    if (raw) {
+      if (width == 1) {
+        byte[0] = (uint8_t) (raw & 0xff);
+      }
+      else if (width == 2) {
+        #ifdef _is_F_endian_big
+          byte[0] = (uint8_t) (raw & 0xff);
+          byte[1] = (uint8_t) ((raw & 0xff00) << 8);
+        #else
+          byte[0] = (uint8_t) ((raw & 0xff00) >> 8);
+          byte[1] = (uint8_t) (raw & 0xff);
+        #endif // _is_F_endian_big
+      }
+      else if (width == 3) {
+        #ifdef _is_F_endian_big
+          byte[0] = (uint8_t) (raw & 0xff);
+          byte[1] = (uint8_t) ((raw & 0xff00) << 8);
+          byte[2] = (uint8_t) ((raw & 0xff0000) << 16);
+        #else
+          byte[0] = (uint8_t) ((raw & 0xff0000) >> 16);
+          byte[1] = (uint8_t) ((raw & 0xff00) >> 8);
+          byte[2] = (uint8_t) (raw & 0xff);
+        #endif // _is_F_endian_big
+      }
+      else {
+        #ifdef _is_F_endian_big
+          byte[0] = (uint8_t) (raw & 0xff);
+          byte[1] = (uint8_t) ((raw & 0xff00) << 8);
+          byte[2] = (uint8_t) ((raw & 0xff0000) << 16);
+          byte[3] = (uint8_t) ((raw & 0xff000000) << 24);
+        #else
+          byte[0] = (uint8_t) ((raw & 0xff000000) >> 24);
+          byte[1] = (uint8_t) ((raw & 0xff0000) >> 16);
+          byte[2] = (uint8_t) ((raw & 0xff00) >> 8);
+          byte[3] = (uint8_t) (raw & 0xff);
+        #endif // _is_F_endian_big
+      }
+    }
+    else {
+      memset(byte, 0, sizeof(uint8_t) * width);
+    }
+
+    fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, character, data->valid_not, data->append);
+  }
+#endif // _di_utf8_print_raw_bytecode_
+
+#ifndef _di_utf8_print_raw_codepoint_
+  void utf8_print_raw_codepoint(utf8_data_t * const data, const f_string_static_t raw) {
+
+    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
+    if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
+
+    fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, raw, data->valid_not, data->append);
+  }
+#endif // _di_utf8_print_raw_codepoint_
+
+#ifndef _di_utf8_print_raw_combining_or_width_
+  void utf8_print_raw_combining_or_width(utf8_data_t * const data, const uint8_t width) {
+
+    if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
+    if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
+
+    f_status_t status = F_none;
+
+    if (data->mode & utf8_mode_to_combining_d) {
+      fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append);
+    }
+    else if (data->mode & utf8_mode_to_width_d) {
+      const f_string_static_t *character = 0;
+
+      switch (width) {
+        case 1:
+          character = &utf8_string_width_1_s;
+          break;
+
+        case 2:
+          character = &utf8_string_width_2_s;
+          break;
+
+        case 3:
+          character = &utf8_string_width_3_s;
+          break;
+
+        case 4:
+          character = &utf8_string_width_4_s;
+          break;
+
+        default:
+          character = &utf8_string_width_0_s;
+      }
+
+      fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, *character, data->valid_not, data->append);
+    }
+  }
+#endif // _di_utf8_print_raw_combining_or_width_
+
  #ifndef _di_utf8_print_section_header_file_
    void utf8_print_section_header_file(utf8_data_t * const data, const f_string_static_t name) {
  
diff --git a/level_3/utf8/c/private-print.h b/level_3/utf8/c/private-print.h

index df35aa07e685584c50f0697eaac110b76bd0cdbf..c6f794d8149b67c76d6d215ff4a3d070062ba697 100644 (file)
--- a/level_3/utf8/c/private-print.h
+++ b/level_3/utf8/c/private-print.h
@@ -152,6 +152,46 @@ extern "C" {
  #endif // _di_utf8_print_error_parameter_file_to_too_many_
  
  /**
+ * Print the raw character data (binary / bytecode).
+ *
+ * @param data
+ *   The program data.
+ * @param raw
+ *   The raw string in integer format.
+ * @param width
+ *   The width the raw character represents (a value inclusively from 1 to 4).
+ */
+#ifndef _di_utf8_print_raw_bytecode_
+  extern void utf8_print_raw_bytecode(utf8_data_t * const data, const f_utf_char_t raw, const uint8_t width) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_raw_bytecode_
+
+/**
+ * Print the raw character data (codepoint).
+ *
+ * @param data
+ *   The program data.
+ * @param raw
+ *   The raw string already in codepoint format.
+ */
+#ifndef _di_utf8_print_raw_codepoint_
+  extern void utf8_print_raw_codepoint(utf8_data_t * const data, const f_string_static_t raw) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_raw_codepoint_
+
+/**
+ * Print the width or combining state of the for a raw character.
+ *
+ * @param data
+ *   The program data.
+ * @param width
+ *   The pre-calculated width.
+ *
+ * @see utf8_print_width()
+ */
+#ifndef _di_utf8_print_raw_combining_or_width_
+  extern void utf8_print_raw_combining_or_width(utf8_data_t * const data, const uint8_t width) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_raw_combining_or_width_
+
+/**
   * Print the input file section header.
   *
   * @param data
diff --git a/level_3/utf8/c/private-utf8.c b/level_3/utf8/c/private-utf8.c

index 3535e21f9ecd868fd1cf7e4597fd2daf2ee30781..e159450a849bf72f1f9223263b7d14a7794d2f74 100644 (file)
--- a/level_3/utf8/c/private-utf8.c
+++ b/level_3/utf8/c/private-utf8.c
@@ -66,10 +66,13 @@ extern "C" {
      } // for
  
      if (F_status_is_error_not(status) && !(data->mode & utf8_mode_from_bytecode_d)) {
-      if (mode_codepoint != utf8_codepoint_mode_ready_e && mode_codepoint != utf8_codepoint_mode_end_e && mode_codepoint != utf8_codepoint_mode_bad_end_e) {
+      if (mode_codepoint != utf8_codepoint_mode_ready_e && mode_codepoint != utf8_codepoint_mode_end_e && mode_codepoint != utf8_codepoint_mode_bad_end_e && mode_codepoint != utf8_codepoint_mode_raw_end_e) {
          if (mode_codepoint == utf8_codepoint_mode_number_e) {
            mode_codepoint = utf8_codepoint_mode_end_e;
          }
+        else if (mode_codepoint == utf8_codepoint_mode_raw_number_e) {
+          mode_codepoint = utf8_codepoint_mode_raw_end_e;
+        }
          else {
            mode_codepoint = utf8_codepoint_mode_bad_end_e;
            valid = F_false;
@@ -77,7 +80,12 @@ extern "C" {
  
          text.used = 0;
  
-        status = utf8_convert_codepoint(data, text, &mode_codepoint);
+        if (mode_codepoint == utf8_codepoint_mode_raw_number_e) {
+          status = utf8_convert_raw(data, text, &mode_codepoint);
+        }
+        else {
+          status = utf8_convert_codepoint(data, text, &mode_codepoint);
+        }
        }
      }
  
diff --git a/level_3/utf8/c/private-utf8_bytecode.c b/level_3/utf8/c/private-utf8_bytecode.c

index 419aef0d09b0c9fe2f61a989ea9cc8520f48ef40..6628d39c07f83c3f922a894aa6b6dd14a2637896 100644 (file)
--- a/level_3/utf8/c/private-utf8_bytecode.c
+++ b/level_3/utf8/c/private-utf8_bytecode.c
@@ -73,7 +73,7 @@ extern "C" {
      f_array_length_t j = 0;
  
      f_char_t block_character[4] = { 0, 0, 0, 0 };
-    f_string_static_t character = macro_f_string_static_t_initialize2(block_character, 4);
+    f_string_static_t character = macro_f_string_static_t_initialize(block_character, 0, 4);
  
      do {
        status = f_file_read_block(file, &data->buffer);
diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c

index 689b718037fce5bd964ad9609010b2b027fe42e6..d0fdd6cf7a6d43136b34c2b8b9763333cfc18f3f 100644 (file)
--- a/level_3/utf8/c/private-utf8_codepoint.c
+++ b/level_3/utf8/c/private-utf8_codepoint.c
@@ -26,10 +26,6 @@ extern "C" {
        } // for
      }
  
-    if (!(*mode == utf8_codepoint_mode_end_e || *mode == utf8_codepoint_mode_bad_end_e)) {
-      return F_none;
-    }
-
      if (*mode == utf8_codepoint_mode_end_e) {
        uint32_t codepoint = 0;
  
@@ -53,8 +49,8 @@ extern "C" {
        }
        else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
          if (data->mode & utf8_mode_to_bytecode_d) {
-          f_char_t byte[5] = { 0, 0, 0, 0, 0 };
-          f_string_static_t character = macro_f_string_static_t_initialize2(byte, 5);
+          f_char_t byte[4] = { 0, 0, 0, 0 };
+          f_string_static_t character = macro_f_string_static_t_initialize(byte, 0, 4);
  
            status = f_utf_unicode_from(codepoint, 4, &character.string);
  
@@ -76,11 +72,14 @@ extern "C" {
          }
        }
      }
-    else {
+    else if (*mode == utf8_codepoint_mode_bad_end_e) {
        status = F_none;
  
        utf8_print_character_invalid(data, character);
      }
+    else {
+      return F_none;
+    }
  
      *mode = utf8_codepoint_mode_ready_e;
      data->text.used = 0;
@@ -93,6 +92,90 @@ extern "C" {
    }
  #endif // _di_utf8_convert_codepoint_
  
+#ifndef _di_utf8_convert_raw_
+  f_status_t utf8_convert_raw(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) {
+
+    f_status_t status = F_none;
+    bool valid_not = F_false;
+
+    if (*mode != utf8_codepoint_mode_raw_end_e) {
+      if (data->text.used + character.used >= data->text.size) {
+        status = f_string_dynamic_increase_by(utf8_default_allocation_step_d, &data->text);
+        if (F_status_is_error(status)) return status;
+      }
+
+      for (f_array_length_t i = 0; i < character.used; ++i) {
+        data->text.string[data->text.used++] = character.string[i];
+      } // for
+    }
+
+    if (*mode == utf8_codepoint_mode_raw_end_e) {
+      f_utf_char_t raw = 0;
+
+      {
+        f_number_unsigned_t number = 0;
+
+        status = fl_conversion_dynamic_to_number_unsigned(data->text, &number);
+
+        raw = (f_utf_char_t) number;
+      }
+
+      if (F_status_is_error(status)) {
+        status = F_status_set_fine(status);
+
+        if (status == F_number || status == F_utf_not || status == F_complete_not_utf || status == F_utf_fragment || status == F_number_decimal || status == F_number_negative || status == F_number_positive || status == F_number_overflow) {
+          valid_not = F_true;
+
+          utf8_print_character_invalid(data, character);
+        }
+        else {
+          status = F_status_set_error(status);
+
+          utf8_print_error_decode(data, status, character);
+
+          return status;
+        }
+      }
+      else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
+
+        // The width actually includes the leading '0x', which is not part of the width of the digit in binary form.
+        uint8_t width = data->text.used > 1 ? (data->text.used - 2) / 2 : 0;
+
+        if ((data->text.used - 2) % 2) {
+          ++width;
+        }
+
+        if (data->mode & utf8_mode_to_bytecode_d) {
+          utf8_print_raw_bytecode(data, raw, width);
+        }
+        else if (data->mode & utf8_mode_to_codepoint_d) {
+          utf8_print_raw_codepoint(data, data->text);
+        }
+        else {
+          utf8_print_raw_combining_or_width(data, width);
+        }
+      }
+    }
+    else if (*mode == utf8_codepoint_mode_bad_end_e) {
+      status = F_none;
+
+      utf8_print_character_invalid(data, character);
+    }
+    else {
+      return F_none;
+    }
+
+    *mode = utf8_codepoint_mode_ready_e;
+    data->text.used = 0;
+
+    if (valid_not || F_status_is_error(status)) {
+      return F_valid_not;
+    }
+
+    return status;
+  }
+#endif // _di_utf8_convert_raw_
+
  #ifndef _di_utf8_detect_codepoint_
    f_status_t utf8_detect_codepoint(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) {
  
@@ -106,6 +189,9 @@ extern "C" {
      if (character.string[0] == f_string_ascii_u_s.string[0] || character.string[0] == f_string_ascii_U_s.string[0] || character.string[0] == f_string_ascii_plus_s.string[0]) {
        // Do nothing.
      }
+    else if (character.string[0] == f_string_ascii_0_s.string[0] || character.string[0] == f_string_ascii_x_s.string[0] || character.string[0] == f_string_ascii_X_s.string[0]) {
+      // Do nothing.
+    }
      else if (character.string[0] == f_string_ascii_space_s.string[0]) {
        status = F_space;
      }
@@ -171,6 +257,10 @@ extern "C" {
            *mode = utf8_codepoint_mode_begin_e;
            data->text.used = 0;
          }
+        else if (character.string[0] == f_string_ascii_0_s.string[0]) {
+          *mode = utf8_codepoint_mode_raw_begin_e;
+          data->text.used = 0;
+        }
          else {
            *mode = utf8_codepoint_mode_bad_e;
          }
@@ -183,6 +273,19 @@ extern "C" {
            *mode = utf8_codepoint_mode_bad_e;
          }
        }
+      else if (*mode == utf8_codepoint_mode_raw_begin_e) {
+        if (character.string[0] == f_string_ascii_x_s.string[0] || character.string[0] == f_string_ascii_X_s.string[0]) {
+          *mode = utf8_codepoint_mode_raw_number_e;
+        }
+        else {
+          *mode = utf8_codepoint_mode_bad_e;
+        }
+      }
+      else if (*mode == utf8_codepoint_mode_raw_number_e) {
+        if (status == F_space) {
+          *mode = utf8_codepoint_mode_raw_end_e;
+        }
+      }
        else if (*mode == utf8_codepoint_mode_number_e) {
          if (status == F_space) {
            *mode = utf8_codepoint_mode_end_e;
@@ -205,8 +308,8 @@ extern "C" {
      f_array_length_t i = 0;
      f_array_length_t j = 0;
  
-    f_char_t block[4] = { 0, 0, 0, 0 };
-    f_string_static_t character = macro_f_string_static_t_initialize2(block, 4);
+    f_char_t block[5] = { 0, 0, 0, 0, 0 };
+    f_string_static_t character = macro_f_string_static_t_initialize(block, 0, 4);
  
      do {
        status = f_file_read_block(file, &data->buffer);
@@ -247,7 +350,15 @@ extern "C" {
              status = utf8_detect_codepoint(data, character, &mode_codepoint);
  
              if (F_status_is_fine(status) && status != F_next) {
-              status = utf8_convert_codepoint(data, character, &mode_codepoint);
+              if (mode_codepoint == utf8_codepoint_mode_raw_begin_e || mode_codepoint == utf8_codepoint_mode_raw_number_e || mode_codepoint == utf8_codepoint_mode_raw_end_e) {
+                status = utf8_convert_raw(data, character, &mode_codepoint);
+
+                // Raw mode represents an invalid Unicode sequence.
+                valid = F_false;
+              }
+              else {
+                status = utf8_convert_codepoint(data, character, &mode_codepoint);
+              }
              }
            }
  
@@ -276,7 +387,15 @@ extern "C" {
          status = utf8_detect_codepoint(data, character, &mode_codepoint);
  
          if (F_status_is_fine(status) && status != F_next) {
-          status = utf8_convert_codepoint(data, character, &mode_codepoint);
+          if (mode_codepoint == utf8_codepoint_mode_raw_begin_e || mode_codepoint == utf8_codepoint_mode_raw_number_e || mode_codepoint == utf8_codepoint_mode_raw_end_e) {
+            status = utf8_convert_raw(data, character, &mode_codepoint);
+
+            // Raw mode represents an invalid Unicode sequence.
+            valid = F_false;
+          }
+          else {
+            status = utf8_convert_codepoint(data, character, &mode_codepoint);
+          }
          }
        }
  
diff --git a/level_3/utf8/c/private-utf8_codepoint.h b/level_3/utf8/c/private-utf8_codepoint.h

index 184b9fb8c76102f68b133c705a96b219abb0508e..199895e22ee3f460166ba4e523e5e4eeecc3341f 100644 (file)
--- a/level_3/utf8/c/private-utf8_codepoint.h
+++ b/level_3/utf8/c/private-utf8_codepoint.h
@@ -15,7 +15,7 @@ extern "C" {
  /**
   * Convert a codepoint character representation to another format.
   *
- * This automatically determines the output format and is also handles the verify process.
+ * This automatically determines the output format and also handles the verify process.
   *
   * @param data
   *   The program data.
@@ -28,15 +28,43 @@ extern "C" {
   *   F_none on success.
   *   F_utf_not on invalid UTF-8 (which is still "success" when verifying).
   *
- *   F_utf_not (with error bit) if not verifying and
+ *   F_utf_not (with error bit) if not verifying and the Unicode value is invalid.
   *
   *   Errors (with error bit) from: f_utf_unicode_to()
+ *
+ * @see f_utf_unicode_to()
   */
  #ifndef _di_utf8_convert_codepoint_
    extern f_status_t utf8_convert_codepoint(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) F_attribute_visibility_internal_d;
  #endif // _di_utf8_convert_codepoint_
  
  /**
+ * Convert a raw character representation (hex-digit) to another format.
+ *
+ * This automatically determines the output format and also handles the verify process.
+ *
+ * @param data
+ *   The program data.
+ * @param character
+ *   The a single character currently being processed.
+ * @param mode
+ *   The codepoint mode the text is currently in.
+ *
+ * @return
+ *   F_none on success.
+ *   F_valid_not on invalid raw (which is still "success" when verifying).
+ *
+ *   F_valid_not (with error bit) if not verifying and the raw value is invalid.
+ *
+ *   Errors (with error bit) from: f_utf_unicode_to()
+ *
+ * @see f_utf_unicode_to()
+ */
+#ifndef _di_utf8_convert_raw_
+  extern f_status_t utf8_convert_raw(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) F_attribute_visibility_internal_d;
+#endif // _di_utf8_convert_raw_
+
+/**
   * Detect a codepoint character.
   *
   * @param data
author	Kevin Day <thekevinday@gmail.com>
	Mon, 23 May 2022 01:45:31 +0000 (20:45 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Mon, 23 May 2022 01:45:31 +0000 (20:45 -0500)
level_3/utf8/c/common.c		patch \| blob \| history
level_3/utf8/c/common.h		patch \| blob \| history
level_3/utf8/c/private-common.h		patch \| blob \| history
level_3/utf8/c/private-print.c		patch \| blob \| history
level_3/utf8/c/private-print.h		patch \| blob \| history
level_3/utf8/c/private-utf8.c		patch \| blob \| history
level_3/utf8/c/private-utf8_bytecode.c		patch \| blob \| history
level_3/utf8/c/private-utf8_codepoint.c		patch \| blob \| history
level_3/utf8/c/private-utf8_codepoint.h		patch \| blob \| history