From: Kevin Day Date: Fri, 10 Dec 2021 01:17:43 +0000 (-0600) Subject: Bugfix: Codepoint to Binary is not working. X-Git-Tag: 0.5.7~59 X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=d50c449769f3819d98b9c7966974b8aac26376f2;p=fll Bugfix: Codepoint to Binary is not working. The wrong variable is being processed. The codepoint (which is the Unicode representation, such as U+8C78 the codepoint is for the character '豸') is being width checked. The binary character is what should be getting the width check. A second situation where the codepoint is not being printed at all is with the files. It seems that I forgot to finish writing this code (another problem caused by my original accidental commit of this project). While investigating this I saw some opportunity for some cleanup. - Move the width detection into a separate function utf8_process_text_width(). - Use character.string[0] instead of *character.string. - Rename 'character' to 'current' to make more semantic sense (At the time I wasn't sure what to call it and 'text' was already unavailable). - The 'text' in private-utf8_codepoint.c is now initialized in a simpler way. --- diff --git a/level_3/utf8/c/private-utf8.c b/level_3/utf8/c/private-utf8.c index 70d99d6..ccee4d4 100644 --- a/level_3/utf8/c/private-utf8.c +++ b/level_3/utf8/c/private-utf8.c @@ -32,11 +32,13 @@ extern "C" { bool valid = F_true; uint8_t mode_codepoint = utf8_codepoint_mode_ready; - f_string_static_t character = macro_f_string_static_t_initialize(text, 4); + f_string_static_t current = macro_f_string_static_t_initialize(text, 0); + + utf8_process_text_width(¤t); flockfile(data->file.stream); - for (uint16_t signal_check = 0; *character.string && F_status_is_error_not(status); ) { + for (uint16_t signal_check = 0; current.string[0] && F_status_is_error_not(status); ) { if (!((++signal_check) % utf8_signal_check_d)) { if (utf8_signal_received(data)) { @@ -48,53 +50,24 @@ extern "C" { } status = F_none; - character.used = macro_f_utf_byte_width(*character.string); - - // Re-adjust used if buffer ended before the character is supposed to end. - if (character.string[0]) { - if (character.used > 1) { - if (character.string[1]) { - if (character.used > 2) { - if (character.string[2]) { - if (character.used > 3) { - if (character.string[3]) { - character.used = 4; - } - else { - character.used = 3; - } - } - } - else { - character.used = 2; - } - } - } - else { - character.used = 1; - } - } - } - else { - character.used = 0; - } if (data->mode & utf8_mode_from_binary_d) { - status = utf8_convert_binary(data, character); + status = utf8_convert_binary(data, current); } else { - status = utf8_detect_codepoint(data, character, &mode_codepoint); + status = utf8_detect_codepoint(data, current, &mode_codepoint); if (F_status_is_fine(status) && status != F_next) { - status = utf8_convert_codepoint(data, character, &mode_codepoint); + status = utf8_convert_codepoint(data, current, &mode_codepoint); } } - character.string += character.used; - if (status == F_utf) { valid = F_false; } + + current.string += current.used; + utf8_process_text_width(¤t); } // for if (F_status_is_error_not(status) && !(data->mode & utf8_mode_from_binary_d)) { @@ -107,9 +80,9 @@ extern "C" { valid = F_false; } - character.used = 0; + current.used = 0; - status = utf8_convert_codepoint(data, character, &mode_codepoint); + status = utf8_convert_codepoint(data, current, &mode_codepoint); } } @@ -133,6 +106,55 @@ extern "C" { } #endif // _di_utf8_process_text_ +#ifndef _di_utf8_process_text_width_ + void utf8_process_text_width(f_string_static_t *text) { + + if (!text->string[0]) { + return; + } + + text->used = 0; + text->size = macro_f_utf_byte_width(text->string[0]); + + if (text->size == 1) { + text->used = text->string[0] ? 1 : 0; + } + else if (text->used == 2) { + if (!text->string[0]) { + text->used = 1; + } + else { + text->used = text->string[1] ? 2 : 1; + } + } + else if (text->used == 3) { + if (!text->string[0]) { + text->used = 1; + } + else if (!text->string[1]) { + text->used = 2; + } + else { + text->used = text->string[2] ? 3 : 2; + } + } + else { + if (!text->string[0]) { + text->used = 1; + } + else if (!text->string[1]) { + text->used = 2; + } + else if (!text->string[2]) { + text->used = 3; + } + else { + text->used = text->string[3] ? 4 : 3; + } + } + } +#endif // _di_utf8_process_text_width_ + #ifdef __cplusplus } // extern "C" #endif diff --git a/level_3/utf8/c/private-utf8.h b/level_3/utf8/c/private-utf8.h index 0490b3e..618abda 100644 --- a/level_3/utf8/c/private-utf8.h +++ b/level_3/utf8/c/private-utf8.h @@ -52,6 +52,19 @@ extern "C" { extern f_status_t utf8_process_text(utf8_data_t * const data, const f_string_t text) F_attribute_visibility_internal_d; #endif // _di_utf8_process_text_ +/** + * Populate the text used and size based on the string. + * + * @param text + * The character data. + * + * The used represents the actual length of the character in bytes. + * The size represents the expected length of the character in bytes. + */ +#ifndef _di_utf8_process_text_width_ + extern void utf8_process_text_width(f_string_static_t *text) F_attribute_visibility_internal_d; +#endif // _di_utf8_process_text_width_ + #ifdef __cplusplus } // extern "C" #endif diff --git a/level_3/utf8/c/private-utf8_binary.c b/level_3/utf8/c/private-utf8_binary.c index fc414ac..781cd00 100644 --- a/level_3/utf8/c/private-utf8_binary.c +++ b/level_3/utf8/c/private-utf8_binary.c @@ -129,7 +129,6 @@ extern "C" { // Handle last (incomplete) character when the buffer ended before the character is supposed to end. if (status != F_signal && next == F_false) { - character.used = j; if (data->mode & utf8_mode_from_binary_d) { diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c index 6c8bd7c..3569b60 100644 --- a/level_3/utf8/c/private-utf8_codepoint.c +++ b/level_3/utf8/c/private-utf8_codepoint.c @@ -54,10 +54,7 @@ extern "C" { else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { if (data->mode & utf8_mode_to_binary_d) { char byte[5] = { 0, 0, 0, 0, 0 }; - f_string_static_t text = f_string_static_t_initialize; - text.string = byte; - text.used = macro_f_utf_byte_width(codepoint); - text.size = 5; + f_string_static_t text = macro_f_string_static_t_initialize(byte, 5); status = f_utf_unicode_from(codepoint, 4, &text.string); @@ -66,6 +63,7 @@ extern "C" { } else { status = F_none; + text.used = macro_f_utf_byte_width(text.string[0]); fl_print_format("%s%r%s", data->file.stream, data->prepend, text, data->append); } @@ -189,6 +187,7 @@ extern "C" { f_status_t status = F_none; bool valid = F_true; bool next = F_true; + uint8_t mode_codepoint = utf8_codepoint_mode_ready; uint16_t signal_check = 0; f_array_length_t i = 0; @@ -226,7 +225,16 @@ extern "C" { } // for if (j == character.used) { - status = utf8_convert_binary(data, character); + if (data->mode & utf8_mode_from_binary_d) { + status = utf8_convert_binary(data, character); + } + else { + status = utf8_detect_codepoint(data, character, &mode_codepoint); + + if (F_status_is_fine(status) && status != F_next) { + status = utf8_convert_codepoint(data, character, &mode_codepoint); + } + } if (status == F_utf) { valid = F_false; @@ -246,7 +254,16 @@ extern "C" { if (status != F_signal && next == F_false) { character.used = j; - status = utf8_convert_binary(data, character); + if (data->mode & utf8_mode_from_binary_d) { + status = utf8_convert_binary(data, character); + } + else { + status = utf8_detect_codepoint(data, character, &mode_codepoint); + + if (F_status_is_fine(status) && status != F_next) { + status = utf8_convert_codepoint(data, character, &mode_codepoint); + } + } if (status == F_utf) { valid = F_false;