From 4cf6db552e2ad386523d7f5593d85b484b9fdc82 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Sat, 2 Apr 2022 09:03:20 -0500 Subject: [PATCH] Bugfix: Incorrectly checking byte width when already inside a character. The width has already been determined, so these values are parts of a UTF-8 character. The code is doing a byte check and attempting to verify or process this as a new UTF-8 character sequence. This results in an inaccurate data from that point forward for the rest of the file. --- level_3/byte_dump/c/private-byte_dump.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/level_3/byte_dump/c/private-byte_dump.c b/level_3/byte_dump/c/private-byte_dump.c index 6980f0e..e768944 100644 --- a/level_3/byte_dump/c/private-byte_dump.c +++ b/level_3/byte_dump/c/private-byte_dump.c @@ -108,8 +108,6 @@ extern "C" { } // Process a UTF-8 character fragment. else if (width_count < width_utf) { - width_current = macro_f_utf_byte_width_is(byte); - if (width_count == 1) { characters.string[character_current] |= macro_f_utf_char_t_from_char_2(byte); } @@ -122,16 +120,8 @@ extern "C" { ++width_count; - // UTF-8 character fragments must have a width of 1 (and ASCII characters can only be the first character in a sequence). - if (width_current == 1) { - - // Grab the next UTF-8 character fragment if the entire sequence is not collected yet. - if (width_count < width_utf) continue; - } - else { - found_invalid_utf = F_true; - invalid[character_current] = width_utf; - } + // Grab the next UTF-8 character fragment if the entire sequence is not collected yet. + if (width_count < width_utf) continue; } // At this point: an ASCII character is collected, the entire UTF-8 character sequence is collected, or an invalid UTF-8 was processed. -- 1.8.3.1