From: Kevin Day Date: Mon, 23 May 2022 02:39:27 +0000 (-0500) Subject: Bugfix: Combining and Width detection for utf8 are not properly printing. X-Git-Tag: 0.5.10~103 X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=9c57bf74f218b56ba37ade21b3e78fc493797c93;p=fll Bugfix: Combining and Width detection for utf8 are not properly printing. The wrong data is being passed to utf8_print_combining_or_width(). Change the behavior to send the correct string to the function. Move the error printing to a single function and use this function in all such cases. --- diff --git a/level_3/utf8/c/common.h b/level_3/utf8/c/common.h index 2852a15..8d231fd 100644 --- a/level_3/utf8/c/common.h +++ b/level_3/utf8/c/common.h @@ -297,22 +297,22 @@ extern "C" { * Modes used to designate how to the input and output are to be processed. * * utf8_mode_from_*: - * - bytesequence: The input format is bytesequence. - * - codepoint: The input format is codepoint (U+XXXX or U+XXXXXX). + * - bytesequence: The input format is bytesequence. + * - codepoint: The input format is codepoint (U+XXXX or U+XXXXXX). * * utf8_mode_to_*: - * - bytesequence: The outout format is bytesequence. - * - codepoint: The outout format is codepoint (U+XXXX or U+XXXXXX). - * - combining: The outout format is whether or not character is combining (may be used with "width"). - * - width: The outout format is how wide the character is (may be used with "combining"). + * - bytesequence: The outout format is bytesequence. + * - codepoint: The outout format is codepoint (U+XXXX or U+XXXXXX). + * - combining: The outout format is whether or not character is combining (may be used with "width"). + * - width: The outout format is how wide the character is (may be used with "combining"). */ #ifndef _di_utf8_modes_ - #define utf8_mode_from_bytesequence_d 0x1 - #define utf8_mode_from_codepoint_d 0x2 - #define utf8_mode_to_bytesequence_d 0x4 - #define utf8_mode_to_codepoint_d 0x8 - #define utf8_mode_to_combining_d 0x10 - #define utf8_mode_to_width_d 0x20 + #define utf8_mode_from_bytesequence_d 0x1 + #define utf8_mode_from_codepoint_d 0x2 + #define utf8_mode_to_bytesequence_d 0x4 + #define utf8_mode_to_codepoint_d 0x8 + #define utf8_mode_to_combining_d 0x10 + #define utf8_mode_to_width_d 0x20 #endif // _di_utf8_modes_ #ifdef __cplusplus diff --git a/level_3/utf8/c/private-print.c b/level_3/utf8/c/private-print.c index 4f7a9aa..ea02bfe 100644 --- a/level_3/utf8/c/private-print.c +++ b/level_3/utf8/c/private-print.c @@ -82,7 +82,7 @@ extern "C" { } } else if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_none_e && data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) { - fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append); + utf8_print_error_combining_or_width(data); } } else if (data->mode & utf8_mode_to_width_d) { @@ -91,6 +91,16 @@ extern "C" { } #endif // _di_utf8_print_combining_or_width_ +#ifndef _di_utf8_print_error_combining_or_width_ + void utf8_print_error_combining_or_width(utf8_data_t * const data) { + + if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return; + if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return; + + fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append); + } +#endif // _di_utf8_print_error_combining_or_width_ + #ifndef _di_utf8_print_error_decode_ void utf8_print_error_decode(utf8_data_t * const data, const f_status_t status, const f_string_static_t character) { @@ -285,7 +295,7 @@ extern "C" { f_status_t status = F_none; if (data->mode & utf8_mode_to_combining_d) { - fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append); + utf8_print_error_combining_or_width(data); } else if (data->mode & utf8_mode_to_width_d) { const f_string_static_t *character = 0; @@ -407,9 +417,38 @@ extern "C" { } } - if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_none_e && data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) { - fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, utf8_string_unknown_s, data->valid_not, data->append); + utf8_print_error_combining_or_width(data); + } +#endif // _di_utf8_print_width_ + +#ifndef _di_utf8_print_width_codepoint_ + void utf8_print_width_codepoint(utf8_data_t * const data, const f_string_static_t character) { + + f_status_t status = f_utf_is_wide(character.string, character.used); + + if (status == F_true) { + fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_2_s, data->append); + + return; } + + if (status == F_false) { + status = f_utf_is_graph(character.string, character.used); + + if (status == F_true) { + fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_1_s, data->append); + + return; + } + + if (status == F_false) { + fl_print_format("%r%r%r", data->file.stream, data->prepend, utf8_string_width_0_s, data->append); + + return; + } + } + + utf8_print_error_combining_or_width(data); } #endif // _di_utf8_print_width_ diff --git a/level_3/utf8/c/private-print.h b/level_3/utf8/c/private-print.h index 8b3b3c8..8afd76b 100644 --- a/level_3/utf8/c/private-print.h +++ b/level_3/utf8/c/private-print.h @@ -69,6 +69,16 @@ extern "C" { #endif // _di_utf8_print_combining_or_width_ /** + * Print an error regarding the width or combining state of a some character. + * + * @param data + * The program data. + */ +#ifndef _di_utf8_print_error_combining_or_width_ + extern void utf8_print_error_combining_or_width(utf8_data_t * const data) F_attribute_visibility_internal_d; +#endif // _di_utf8_print_error_combining_or_width_ + +/** * Print error message when attempt to decode the character failed. * * @param data diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c index 20b89a4..30cbe37 100644 --- a/level_3/utf8/c/private-utf8_codepoint.c +++ b/level_3/utf8/c/private-utf8_codepoint.c @@ -48,27 +48,34 @@ extern "C" { } } else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) { - if (data->mode & utf8_mode_to_bytesequence_d) { + if (data->mode & utf8_mode_to_codepoint_d) { + utf8_print_codepoint(data, codepoint); + } + else { f_char_t byte[4] = { 0, 0, 0, 0 }; f_string_static_t character = macro_f_string_static_t_initialize(byte, 0, 4); status = f_utf_unicode_from(codepoint, 4, &character.string); if (F_status_is_error(status)) { - utf8_print_error_encode(data, status, codepoint); + if (data->mode & utf8_mode_to_bytesequence_d) { + utf8_print_error_encode(data, status, codepoint); + } + else { + utf8_print_error_combining_or_width(data); + } } - else { + else if (data->mode & utf8_mode_to_bytesequence_d) { status = F_none; character.used = macro_f_utf_byte_width(character.string[0]); utf8_print_bytesequence(data, character); } - } - else if (data->mode & utf8_mode_to_codepoint_d) { - utf8_print_codepoint(data, codepoint); - } - else { - utf8_print_combining_or_width(data, character); + else { + status = F_none; + + utf8_print_combining_or_width(data, character); + } } } }