From 264951c9c080c274be347563b11a6f4619c463e0 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Thu, 9 Dec 2021 19:36:39 -0600 Subject: [PATCH] Bugfix: Do not print leading zero's in large Unicode codepoints. Also cleanup the code moving some generic print functions into utf8-specific print functions. Use character rather than text to better communicate that the string is intended to represent a single Unicode character. --- level_3/utf8/c/private-print.c | 22 ++++++++++++++++++++++ level_3/utf8/c/private-print.h | 26 ++++++++++++++++++++++++++ level_3/utf8/c/private-utf8_binary.c | 4 ++-- level_3/utf8/c/private-utf8_codepoint.c | 10 +++++----- 4 files changed, 55 insertions(+), 7 deletions(-) diff --git a/level_3/utf8/c/private-print.c b/level_3/utf8/c/private-print.c index 64d067e..af811da 100644 --- a/level_3/utf8/c/private-print.c +++ b/level_3/utf8/c/private-print.c @@ -6,6 +6,13 @@ extern "C" { #endif +#ifndef _di_utf8_print_binary_ + void utf8_print_binary(utf8_data_t * const data, const f_string_static_t character) { + + fl_print_format("%s%r%s", data->file.stream, data->prepend, character, data->append); + } +#endif // _di_utf8_print_binary_ + #ifndef _di_utf8_print_character_ void utf8_print_character(utf8_data_t * const data, const f_string_static_t character, const f_color_set_t set) { @@ -26,6 +33,21 @@ extern "C" { } #endif // _di_utf8_print_character_ +#ifndef _di_utf8_print_codepoint_ + void utf8_print_codepoint(utf8_data_t * const data, const uint32_t codepoint) { + + if (codepoint < 0xffff) { + fl_print_format("%sU+%04_U%s", data->file.stream, data->prepend, codepoint, data->append); + } + else if (codepoint < 0x100000) { + fl_print_format("%sU+%05_U%s", data->file.stream, data->prepend, codepoint, data->append); + } + else { + fl_print_format("%sU+%06_U%s", data->file.stream, data->prepend, codepoint, data->append); + } + } +#endif // _di_utf8_print_codepoint_ + #ifndef _di_utf8_print_error_decode_ void utf8_print_error_decode(utf8_data_t * const data, const f_status_t status, const f_string_static_t character) { diff --git a/level_3/utf8/c/private-print.h b/level_3/utf8/c/private-print.h index 3e528db..8b0049e 100644 --- a/level_3/utf8/c/private-print.h +++ b/level_3/utf8/c/private-print.h @@ -13,6 +13,19 @@ extern "C" { #endif /** + * Print the binary character (such as '豸'). + * + * @param data + * The program data. + * @param character + * The character to print. + * This is a string that represents a single character. + */ +#ifndef _di_utf8_print_binary_ + extern void utf8_print_binary(utf8_data_t * const data, const f_string_static_t character) F_attribute_visibility_internal_d; +#endif // _di_utf8_print_binary_ + +/** * Print the character either as a Unicode codeblock or as a binary. * * @param data @@ -27,6 +40,19 @@ extern "C" { #endif // _di_utf8_print_character_ /** + * Print the codepoint number as a codepoint string (such as U+8C78). + * + * @param data + * The program data. + * @param codepoint + * The codepoint to print. + * This is the code that represents a single character. + */ +#ifndef _di_utf8_print_codepoint_ + extern void utf8_print_codepoint(utf8_data_t * const data, const uint32_t codepoint) F_attribute_visibility_internal_d; +#endif // _di_utf8_print_codepoint_ + +/** * Print error message when attempt to decode the character failed. * * @param data diff --git a/level_3/utf8/c/private-utf8_binary.c b/level_3/utf8/c/private-utf8_binary.c index 781cd00..aa74772 100644 --- a/level_3/utf8/c/private-utf8_binary.c +++ b/level_3/utf8/c/private-utf8_binary.c @@ -42,10 +42,10 @@ extern "C" { } else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { if (data->mode & utf8_mode_to_binary_d) { - fl_print_format("%s%r%s", data->file.stream, data->prepend, character, data->append); + utf8_print_binary(data, character); } else { - fl_print_format(codepoint < 0xffff ? "%sU+%04_U%s" : "%sU+%6_U%s", data->file.stream, data->prepend, codepoint, data->append); + utf8_print_codepoint(data, codepoint); } } diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c index 3569b60..87e73bd 100644 --- a/level_3/utf8/c/private-utf8_codepoint.c +++ b/level_3/utf8/c/private-utf8_codepoint.c @@ -54,22 +54,22 @@ extern "C" { else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { if (data->mode & utf8_mode_to_binary_d) { char byte[5] = { 0, 0, 0, 0, 0 }; - f_string_static_t text = macro_f_string_static_t_initialize(byte, 5); + f_string_static_t character = macro_f_string_static_t_initialize(byte, 5); - status = f_utf_unicode_from(codepoint, 4, &text.string); + status = f_utf_unicode_from(codepoint, 4, &character.string); if (F_status_is_error(status)) { utf8_print_error_decode(data, status, character); } else { status = F_none; - text.used = macro_f_utf_byte_width(text.string[0]); + character.used = macro_f_utf_byte_width(character.string[0]); - fl_print_format("%s%r%s", data->file.stream, data->prepend, text, data->append); + utf8_print_binary(data, character); } } else { - fl_print_format(codepoint < 0xffff ? "%sU+%04_U%s" : "%sU+%6_U%s", data->file.stream, data->prepend, codepoint, data->append); + utf8_print_codepoint(data, codepoint); } } } -- 1.8.3.1