I had originally accidentally committed the utf8 program before it was ready.
I followed up with a cleanup after I noticed this.
It seems that there is still more work to finish.
Looking at what I need to do to finish this it has become clear to me that I was originally working on this and realized I should move functionality into the level_0 f_utf project.
When I did this, I probably noticed a Unicode bug and stopped what I was doing to fix it.
I then forgot to come back and fix this code, leaving it in this incomplete and broken state.
I also noticed that the f_utf_unicode_string_from() function is mis-named.
The is a "to" function rather than a "from" function because it is creating to a Unicode codepoint.
The "raw" print mode is now supported so use the fl_print_format() to print.
Move the printing of "append" to after the closing color context.
This makes more sense, but I have not bothered to check to see if the design logic is intended to be used this way.
}
#endif // _di_f_utf_unicode_from_
-#ifndef _di_f_utf_unicode_string_from_f_
- f_status_t f_utf_unicode_string_from(const f_string_t string, const f_array_length_t length, uint32_t *unicode) {
+#ifndef _di_f_utf_unicode_to_
+ f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, uint32_t *unicode) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ if (!unicode) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ {
+ f_utf_character_t character_utf = 0;
+
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+
+ if (private_f_utf_character_is_valid(character_utf) == F_false) {
+ return F_status_set_error(F_utf);
+ }
+ }
+
+ // U+0000 -> U+007F.
+ if (macro_f_utf_byte_width(*character) == 1) {
+ *unicode = ((uint8_t) character[0]) & 0x7f;
+ }
+
+ // U+0080 -> U+07FF.
+ else if (macro_f_utf_byte_width(*character) == 2) {
+ *unicode = (((uint8_t) character[0]) & 0x1f) << 6;
+ *unicode |= ((uint8_t) character[1]) & 0x3f;
+ }
+
+ // U+0800 -> U+FFFF.
+ else if (macro_f_utf_byte_width(*character) == 3) {
+ *unicode = (((uint8_t) character[0]) & 0xf) << 12;
+ *unicode |= (((uint8_t) character[1]) & 0x3f) << 6;
+ *unicode |= ((uint8_t) character[2]) & 0x3f;
+ }
+
+ // U+10000 -> U+10FFFF.
+ else if (macro_f_utf_byte_width(*character) == 4) {
+ *unicode = (((uint8_t) character[0]) & 0x7) << 18;
+ *unicode |= (((uint8_t) character[1]) & 0x3f) << 12;
+ *unicode |= (((uint8_t) character[2]) & 0x3f) << 6;
+ *unicode |= ((uint8_t) character[3]) & 0x3f;
+ }
+
+ return F_none;
+ }
+#endif // _di_f_utf_unicode_to_
+
+#ifndef _di_f_utf_unicode_string_to_f_
+ f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, uint32_t *unicode) {
#ifndef _di_level_0_parameter_checking_
if (!unicode) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
return F_none;
}
-#endif // _di_f_utf_unicode_string_from_
-
-#ifndef _di_f_utf_unicode_to_
- f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, uint32_t *unicode) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- if (!unicode) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- {
- f_utf_character_t character_utf = 0;
-
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
-
- if (private_f_utf_character_is_valid(character_utf) == F_false) {
- return F_status_set_error(F_utf);
- }
- }
-
- // U+0000 -> U+007F.
- if (macro_f_utf_byte_width(*character) == 1) {
- *unicode = ((uint8_t) character[0]) & 0x7f;
- }
-
- // U+0080 -> U+07FF.
- else if (macro_f_utf_byte_width(*character) == 2) {
- *unicode = (((uint8_t) character[0]) & 0x1f) << 6;
- *unicode |= ((uint8_t) character[1]) & 0x3f;
- }
-
- // U+0800 -> U+FFFF.
- else if (macro_f_utf_byte_width(*character) == 3) {
- *unicode = (((uint8_t) character[0]) & 0xf) << 12;
- *unicode |= (((uint8_t) character[1]) & 0x3f) << 6;
- *unicode |= ((uint8_t) character[2]) & 0x3f;
- }
-
- // U+10000 -> U+10FFFF.
- else if (macro_f_utf_byte_width(*character) == 4) {
- *unicode = (((uint8_t) character[0]) & 0x7) << 18;
- *unicode |= (((uint8_t) character[1]) & 0x3f) << 12;
- *unicode |= (((uint8_t) character[2]) & 0x3f) << 6;
- *unicode |= ((uint8_t) character[3]) & 0x3f;
- }
-
- return F_none;
- }
-#endif // _di_f_utf_unicode_to_
+#endif // _di_f_utf_unicode_string_to_
#ifdef __cplusplus
} // extern "C"
#endif // _di_f_utf_unicode_from_
/**
- * Convert a string of the format "U+FFFF" into the codepoint value.
- *
- * This ignores NULL characters.
- * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
- * The "U+" prefix is optional.
- * Only ASCII characters are allowed to represent the Unicode sequence string.
- *
- * @param string
- * The string representing a Unicode sequence.
- * @param length
- * The maximum number of characters.
- * @param unicode
- * A 32-bit integer representing the Unicode (such as U+0001).
- * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- * F_none on success.
- *
- * F_failure (with error bit) if width_max is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_valid_not (with error bit) if string is not a valid Unicode string.
- */
-#ifndef _di_f_utf_unicode_string_from_f_
- extern f_status_t f_utf_unicode_string_from(const f_string_t string, const f_array_length_t length, uint32_t *unicode);
-#endif // _di_f_utf_unicode_string_from_
-
-/**
* Convert a given string block representing a single character into Unicode.
*
* @param character
extern f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, uint32_t *unicode);
#endif // _di_f_utf_unicode_to_
+/**
+ * Convert a string of the format "U+FFFF" into the codepoint value.
+ *
+ * This ignores NULL characters.
+ * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
+ * The "U+" prefix is optional.
+ * Only ASCII characters are allowed to represent the Unicode sequence string.
+ *
+ * @param string
+ * The string representing a Unicode sequence.
+ * @param length
+ * The maximum number of characters.
+ * @param unicode
+ * A 32-bit integer representing the Unicode (such as U+0001).
+ * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_failure (with error bit) if width_max is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_valid_not (with error bit) if string is not a valid Unicode string.
+ */
+#ifndef _di_f_utf_unicode_string_to_f_
+ extern f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, uint32_t *unicode);
+#endif // _di_f_utf_unicode_string_to_
+
#ifdef __cplusplus
} // extern "C"
#endif
}
if (data->mode & utf8_mode_to_binary_d) {
- fl_print_format("%s%[", data->file.stream, data->prepend, set);
- f_print_dynamic_raw(character, data->file.stream);
- fl_print_format("%s%]", data->file.stream, data->append, set);
+ fl_print_format("%s%[%r%]%s", data->file.stream, data->prepend, set, character, set, data->append);
}
else {
fl_print_format("%s%[0x", data->file.stream, data->prepend, set);
fl_print_format("%04_uii", data->file.stream, (uint8_t) character.string[i]);
} // for
- fl_print_format("%s%]", data->file.stream, data->append, set);
+ fl_print_format("%]%s", data->file.stream, set, data->append);
}
}
#endif // _di_utf8_print_character_
if (data->main->error.verbosity == f_console_verbosity_quiet) return;
- fl_print_format("%c%[%SFailed to decode character '%]", data->main->error.to.stream, f_string_eol_s[0], data->main->context.set.error, data->main->context.set.error);
+ fl_print_format("%c%[%SFailed to decode character '%]", data->main->error.to.stream, f_string_eol_s[0], data->main->context.set.error, data->main->error.prefix, data->main->context.set.error);
fl_print_format("%[%r%]", data->main->error.to.stream, data->main->context.set.notable, character, data->main->context.set.notable);
fl_print_format("%[', error status code%] ", data->main->error.to.stream, data->main->context.set.error, data->main->context.set.error, f_string_eol_s[0]);
- fl_print_format("%[%S%]", data->main->error.to.stream, data->main->context.set.notable, F_status_set_fine(status), data->main->context.set.notable);
+ fl_print_format("%[%ui%]", data->main->error.to.stream, data->main->context.set.notable, F_status_set_fine(status), data->main->context.set.notable);
fl_print_format("%[.%]%c", data->main->error.to.stream, data->main->context.set.error, data->main->context.set.error, f_string_eol_s[0]);
}
#endif // _di_utf8_print_error_decode_
}
else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) {
if (data->mode & utf8_mode_to_binary_d) {
- f_print_terminated(data->prepend, data->file.stream);
- f_print_dynamic_raw(character, data->file.stream);
- f_print_terminated(data->append, data->file.stream);
+ fl_print_format("%s%r%s", data->file.stream, data->prepend, character, data->append);
}
else {
fl_print_format(codepoint < 0xffff ? "%sU+%04_U%s" : "%sU+%6_U%s", data->file.stream, data->prepend, codepoint, data->append);
if (*mode == utf8_codepoint_mode_end) {
uint32_t codepoint = 0;
- status = f_utf_unicode_string_from(data->text.string, data->text.used, &codepoint);
+ status = f_utf_unicode_string_to(data->text.string, data->text.used, &codepoint);
if (F_status_is_error(status)) {
if (F_status_set_fine(status) == F_failure || F_status_set_fine(status) == F_utf) {
text.used = macro_f_utf_byte_width(codepoint);
text.size = 5;
- byte[0] = macro_f_utf_character_t_to_char_1(codepoint);
+ status = f_utf_unicode_from(codepoint, 4, &text.string);
- if (text.used > 1) {
- byte[1] = macro_f_utf_character_t_to_char_2(codepoint);
-
- if (text.used > 2) {
- byte[2] = macro_f_utf_character_t_to_char_3(codepoint);
-
- if (text.used > 3) {
- byte[3] = macro_f_utf_character_t_to_char_4(codepoint);
- }
- }
+ if (F_status_is_error(status)) {
+ utf8_print_error_decode(data, status, character);
}
+ else {
+ status = F_none;
- f_print_terminated(data->prepend, data->file.stream);
- f_print_dynamic_raw(text, data->file.stream);
- f_print_terminated(data->append, data->file.stream);
+ fl_print_format("%s%r%s", data->file.stream, data->prepend, text, data->append);
+ }
}
else {
fl_print_format(codepoint < 0xffff ? "%sU+%04_U%s" : "%sU+%6_U%s", data->file.stream, data->prepend, codepoint, data->append);