From cd6fee673059a221b8cf504cd0f3b0cbd9b8e9ec Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Wed, 22 Jun 2022 00:20:54 -0500 Subject: [PATCH] Bugfix: A typo resulting in treating < 0xc3 as invalid UTF-8 when it is instead < 0xc2. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is for 2-width characters, such as: '²' (U+00B2) and '½' (U+00BD). These character should not be treated as invalid. I have not yet investigated to see if I need to make other corrections. This is just an obvious mistake that I found and immediately fixed. --- level_0/f_utf/c/private-utf_valid.c | 4 ++-- level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c | 4 ++-- level_0/f_utf/tests/unit/c/test-utf-is_valid.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/level_0/f_utf/c/private-utf_valid.c b/level_0/f_utf/c/private-utf_valid.c index 8e5b786..de38afa 100644 --- a/level_0/f_utf/c/private-utf_valid.c +++ b/level_0/f_utf/c/private-utf_valid.c @@ -76,8 +76,8 @@ extern "C" { // Valid: 110xxxxx 10xxxxxx ???????? ????????. if ((macro_f_utf_char_t_to_char_1(sequence) & 0b11100000) == 0b11000000) { - // Only first byte ranges 0xc3 or greater are valid. - if (macro_f_utf_char_t_to_char_2(sequence) < 0xc3) { + // Only first byte ranges 0xc2 or greater are valid. + if (macro_f_utf_char_t_to_char_2(sequence) < 0xc2) { return F_false; } diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c index 3b448d3..207c890 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c @@ -123,8 +123,8 @@ void test__f_utf_character_is_valid__works(void **state) { // Valid: 110xxxxx 10xxxxxx ???????? ????????. if ((first & 0b11100000) == 0b11000000) { - // Only first byte ranges 0xc3 or greater are valid. - if (second < 0xc3) { + // Only first byte ranges 0xc2 or greater are valid. + if (second < 0xc2) { assert_int_equal(status, F_false); continue; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_valid.c b/level_0/f_utf/tests/unit/c/test-utf-is_valid.c index 584c471..ec3a572 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_valid.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_valid.c @@ -100,8 +100,8 @@ void test__f_utf_is_valid__works(void **state) { // Valid: 110xxxxx 10xxxxxx ???????? ????????. if ((first & 0b11100000) == 0b11000000) { - // Only first byte ranges 0xc3 or greater are valid. - if (second < 0xc3) { + // Only first byte ranges 0xc2 or greater are valid. + if (second < 0xc2) { assert_int_equal(status, F_false); continue; -- 1.8.3.1