From c3706a7fb6527a0e51a60f95fc5b27f56de66f97 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Thu, 27 Oct 2022 19:53:39 -0500 Subject: [PATCH] Update: Support Unicode 15.0. I couldn't easily find what Unicode version I am trying to suppot so I added a unicode.txt document. The initial work was done for Unicode 12.1 and later switched to 14.0. I was still in the initial stages of learning Unicode and so I expect there to be notable incompleteness or incorrectness. With the release of Unicode 15.0 I am able to determine a distinct set of changes and I have applied the changes. Add a private_inline_f_utf_character_handle_digit_offset() inline function to simplify some of the redundant code. --- documents/unicode.txt | 10 + level_0/f_utf/c/private-utf_combining.c | 38 +- level_0/f_utf/c/private-utf_control.c | 8 +- level_0/f_utf/c/private-utf_digit.c | 606 ++++----------------- level_0/f_utf/c/private-utf_emoji.c | 42 +- level_0/f_utf/c/private-utf_numeric.c | 15 + level_0/f_utf/c/private-utf_punctuation.c | 5 + level_0/f_utf/c/private-utf_symbol.c | 13 +- level_0/f_utf/c/private-utf_wide.c | 9 +- level_0/f_utf/c/utf/private-is_unassigned.c | 171 +++++- .../data/tests/bytesequences/combining-all.txt | 26 + .../f_utf/data/tests/bytesequences/control-all.txt | 81 +++ .../f_utf/data/tests/bytesequences/digit-all.txt | 79 +-- .../f_utf/data/tests/bytesequences/emoji-all.txt | 19 + .../f_utf/data/tests/codepoints/combining-all.txt | 26 + .../f_utf/data/tests/codepoints/control-all.txt | 81 +++ level_0/f_utf/data/tests/codepoints/digit-all.txt | 79 +-- level_0/f_utf/data/tests/codepoints/emoji-all.txt | 17 + level_0/f_utf/data/tests/values/digit-all.txt | 79 +-- level_3/byte_dump/c/byte_dump.h | 2 +- level_3/utf8/c/utf8.h | 2 +- 21 files changed, 716 insertions(+), 692 deletions(-) create mode 100644 documents/unicode.txt diff --git a/documents/unicode.txt b/documents/unicode.txt new file mode 100644 index 0000000..6601aae --- /dev/null +++ b/documents/unicode.txt @@ -0,0 +1,10 @@ +# fss-0002 +# +# license: cc-by-sa-4.0 +# + +Unicode: + This project attempts to support Unicode 15.0. + + Given the scale of Unicode in conjunction with the knowledege and availability of the developers behind this project this is goal is likely not met. + The intent is to reach compatibility, therefore fixes, improvements, and corrections will continue to be made as they are discovered and time is available. diff --git a/level_0/f_utf/c/private-utf_combining.c b/level_0/f_utf/c/private-utf_combining.c index 5c20bb3..30485cd 100644 --- a/level_0/f_utf/c/private-utf_combining.c +++ b/level_0/f_utf/c/private-utf_combining.c @@ -336,8 +336,8 @@ extern "C" { return F_true; } - // Kannada: U+0CE2, U+0CE3. - if (sequence == 0xe0b3a200 || sequence == 0xe0b3a300) { + // Kannada: U+0CE2, U+0CE3, U+0CF3. + if (sequence == 0xe0b3a200 || sequence == 0xe0b3a300 || sequence == 0xe0b3b300) { return F_true; } @@ -396,8 +396,8 @@ extern "C" { return F_true; } - // Lao: U+0EBB, U+0EBC. - if (sequence == 0xe0babb00 || sequence == 0xe0babc00) { + // Lao: U+0EBB, U+0EBC, U+0ECE. + if (sequence == 0xe0babb00 || sequence == 0xe0babc00 || sequence == 0xe0bb8e00) { return F_true; } @@ -930,6 +930,11 @@ extern "C" { return F_true; } + // Arabic Extended-C: U+10EFD to U+10EFF. + if (sequence >= 0xf090bbbd && sequence <= 0xf090bbbf) { + return F_true; + } + // Brahmi: U+11001. if (sequence == 0xf0918081) { return F_true; @@ -995,6 +1000,21 @@ extern "C" { return F_true; } + // Kawi: U+11F00 to U+11F03. + if (sequence >= 0xf091bc80 && sequence <= 0xf091bc83) { + return F_true; + } + + // Kawi: U+11F34 to U+11F3A. + if (sequence >= 0xf091bcb4 && sequence <= 0xf091bcba) { + return F_true; + } + + // Kawi: U+11F3E to U+11F42. + if (sequence >= 0xf091bcbe && sequence <= 0xf091bd82) { + return F_true; + } + // Khojki: U+1122F to U+11231. if (sequence >= 0xf09188af && sequence <= 0xf09188b1) { return F_true; @@ -1170,6 +1190,16 @@ extern "C" { return F_true; } + // Cyrillic Extended-D: U+1E08F. + if (sequence == 0xf09e828f) { + return F_true; + } + + // Nag Mundari: U+1E4EC to U+1E4EF. + if (sequence >= 0xf09e93ac && sequence <= 0xf09e93af) { + return F_true; + } + // Mende Kikakui: U+1E8D0 to U+1E8D6. if (sequence >= 0xf09ea390 && sequence <= 0xf09ea396) { return F_true; diff --git a/level_0/f_utf/c/private-utf_control.c b/level_0/f_utf/c/private-utf_control.c index 8a3b232..aadc3eb 100644 --- a/level_0/f_utf/c/private-utf_control.c +++ b/level_0/f_utf/c/private-utf_control.c @@ -97,8 +97,8 @@ extern "C" { return F_true; } - // Egyptian Hieroglyphics: U+13430 to U+13438. - if (sequence >= 0xf09390b0 && sequence <= 0xf09390b8) { + // Egyptian Hieroglyph Format Controls: U+13430 to U+13455. + if (sequence >= 0xf09390b0 && sequence <= 0xf0939195) { return F_true; } @@ -219,8 +219,8 @@ extern "C" { return F_true; } - // Egyptian Hieroglyphics: U+13430 to U+13438. - if (sequence >= 0xf09390b0 && sequence <= 0xf09390b8) { + // Egyptian Hieroglyph Format Controls: U+13430 to U+13455. + if (sequence >= 0xf09390b0 && sequence <= 0xf0939195) { return F_true; } diff --git a/level_0/f_utf/c/private-utf_digit.c b/level_0/f_utf/c/private-utf_digit.c index 035b83a..d08c0c9 100644 --- a/level_0/f_utf/c/private-utf_digit.c +++ b/level_0/f_utf/c/private-utf_digit.c @@ -49,6 +49,57 @@ static inline f_status_t private_inline_f_utf_character_handle_digit(const f_utf /** * Inline helper function to reduce amount of code typed. * + * Given the value, this will conditionally convert the range into an appropriate base-10 integer. + * + * This does not handle non-decimal values (non-base-10). + * + * This applies an offset with the intent of being used for producing values greater than 9 (such as 10 through 19). + * + * @param sequence + * The character sequence to process. + * @param start + * An inclusive start range. + * The base-10 stop range calculated from this. + * @param offset + * An offset needed to add to the calculated base-10 value. + * If value is 9 and offset is 10 then the a value of 19 is returned. + * @param value + * The value to update, if non-NULL. + * + * @return + * F_true for valid digit in the requested range. + * F_false, otherwise. + */ +static inline f_status_t private_inline_f_utf_character_handle_digit_offset(const f_utf_char_t sequence, const f_utf_char_t start, const uint8_t offset, uint64_t * const value) { + + if (value) { + f_char_t ascii = 0x30; + + if (macro_f_utf_char_t_width(sequence) == 2) { + ascii += (f_char_t) macro_f_utf_char_t_to_char_2(sequence - start); + } + else if (macro_f_utf_char_t_width(sequence) == 3) { + ascii += (f_char_t) macro_f_utf_char_t_to_char_3(sequence - start); + } + else if (macro_f_utf_char_t_width(sequence) == 4) { + ascii += (f_char_t) macro_f_utf_char_t_to_char_4(sequence - start); + } + + if (private_f_utf_character_is_digit_for_ascii(ascii, value) == F_true) { + *value += offset; + + return F_true; + } + + return F_false; + } + + return F_true; +} + +/** + * Inline helper function to reduce amount of code typed. + * * Given the value, this will conditionally convert the range into an appropriate base-10 integer from 1 to 9. * * This does not handle non-decimal values (non-base-10). @@ -769,94 +820,9 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c return private_inline_f_utf_character_handle_digit_from_one(sequence, 0xe291a000, value); } - // Enclosed Alphanumerics: U+2469. - if (sequence == 0xe291a900) { - if (value) { - *value = 10; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+246A. - if (sequence == 0xe291aa00) { - if (value) { - *value = 11; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+246B. - if (sequence == 0xe291ab00) { - if (value) { - *value = 12; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+246C. - if (sequence == 0xe291ac00) { - if (value) { - *value = 13; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+246D. - if (sequence == 0xe291ad00) { - if (value) { - *value = 14; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+246E. - if (sequence == 0xe291ae00) { - if (value) { - *value = 15; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+246F. - if (sequence == 0xe291af00) { - if (value) { - *value = 16; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2470. - if (sequence == 0xe291b000) { - if (value) { - *value = 17; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2471. - if (sequence == 0xe291b100) { - if (value) { - *value = 18; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2472. - if (sequence == 0xe291b200) { - if (value) { - *value = 19; - } - - return F_true; + // Enclosed Alphanumerics: U+2469 to U+2472. + if (sequence <= 0xe291b200) { + return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe291a900, 10, value); } // Enclosed Alphanumerics: U+2473. @@ -873,91 +839,20 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c return private_inline_f_utf_character_handle_digit_from_one(sequence, 0xe291b400, value); } - // Enclosed Alphanumerics: U+247D. - if (sequence == 0xe291bd00) { - if (value) { - *value = 10; - } - - return F_true; + // Enclosed Alphanumerics: U+247D to U+247F. + if (sequence <= 0xe291bf00) { + return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe291bd00, 10, value); } - // Enclosed Alphanumerics: U+247E. - if (sequence == 0xe291be00) { - if (value) { - *value = 11; - } - - return F_true; + // Enclosed Alphanumerics: U+2480 to U+2486. + if (sequence <= 0xe2928600) { + return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe2928000, 13, value); } - // Enclosed Alphanumerics: U+247F. - if (sequence == 0xe291bf00) { - if (value) { - *value = 12; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2480. - if (sequence == 0xe2928000) { - if (value) { - *value = 13; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2481. - if (sequence == 0xe2928100) { - if (value) { - *value = 14; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2482. - if (sequence == 0xe2928200) { - if (value) { - *value = 15; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2483. - if (sequence == 0xe2928300) { - if (value) { - *value = 16; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2484. - if (sequence == 0xe2928400) { - if (value) { - *value = 17; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2485. - if (sequence == 0xe2928500) { - if (value) { - *value = 18; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2486. - if (sequence == 0xe2928600) { + // Enclosed Alphanumerics: U+2487. + if (sequence == 0xe2928700) { if (value) { - *value = 19; + *value = 20; } return F_true; @@ -977,94 +872,9 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c return private_inline_f_utf_character_handle_digit_from_one(sequence, 0xe2928800, value); } - // Enclosed Alphanumerics: U+2491. - if (sequence == 0xe2929100) { - if (value) { - *value = 10; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2492. - if (sequence == 0xe2929200) { - if (value) { - *value = 11; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2493. - if (sequence == 0xe2929300) { - if (value) { - *value = 12; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2494. - if (sequence == 0xe2929400) { - if (value) { - *value = 13; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2495. - if (sequence == 0xe2929500) { - if (value) { - *value = 14; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2496. - if (sequence == 0xe2929600) { - if (value) { - *value = 15; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2497. - if (sequence == 0xe2929700) { - if (value) { - *value = 16; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2498. - if (sequence == 0xe2929800) { - if (value) { - *value = 17; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+2499. - if (sequence == 0xe2929900) { - if (value) { - *value = 18; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+249A. - if (sequence == 0xe2929a00) { - if (value) { - *value = 19; - } - - return F_true; + // Enclosed Alphanumerics: U+2491 to U+249A. + if (sequence <= 0xe2929a00) { + return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe2929100, 10, value); } // Enclosed Alphanumerics: U+249B. @@ -1087,85 +897,9 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c return F_true; } - // Enclosed Alphanumerics: U+24EB. - if (sequence == 0xe293ab00) { - if (value) { - *value = 11; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+24EC. - if (sequence == 0xe293ac00) { - if (value) { - *value = 12; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+24ED. - if (sequence == 0xe293ad00) { - if (value) { - *value = 13; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+24EE. - if (sequence == 0xe293ae00) { - if (value) { - *value = 14; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+24EF. - if (sequence == 0xe293af00) { - if (value) { - *value = 15; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+24F0. - if (sequence == 0xe293b000) { - if (value) { - *value = 16; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+24F1. - if (sequence == 0xe293b100) { - if (value) { - *value = 17; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+24F2. - if (sequence == 0xe293b200) { - if (value) { - *value = 18; - } - - return F_true; - } - - // Enclosed Alphanumerics: U+24F3. - if (sequence == 0xe293b300) { - if (value) { - *value = 19; - } - - return F_true; + // Enclosed Alphanumerics: U+24EB to U+24F3. + if (sequence <= 0xe293b300) { + return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe293aa00, 10, value); } // Enclosed Alphanumerics: U+24F4. @@ -4849,6 +4583,11 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c return F_true; } + + // Kawi: U+11F50 to U+11F59. + if (sequence >= 0xf091bd90 && sequence <= 0xf091bd99) { + return private_inline_f_utf_character_handle_digit(sequence, 0xf091bd90, value); + } } else if (macro_f_utf_char_t_to_char_2(sequence) == 0x92) { @@ -5436,94 +5175,9 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c return private_inline_f_utf_character_handle_digit(sequence, 0xf096ba80, value); } - // Medefaidrin: U+16E8A. - if (sequence == 0xf096ba8a) { - if (value) { - *value = 10; - } - - return F_true; - } - - // Medefaidrin: U+16E8B. - if (sequence == 0xf096ba8b) { - if (value) { - *value = 11; - } - - return F_true; - } - - // Medefaidrin: U+16E8C. - if (sequence == 0xf096ba8c) { - if (value) { - *value = 12; - } - - return F_true; - } - - // Medefaidrin: U+16E8D. - if (sequence == 0xf096ba8d) { - if (value) { - *value = 13; - } - - return F_true; - } - - // Medefaidrin: U+16E8E. - if (sequence == 0xf096ba8e) { - if (value) { - *value = 14; - } - - return F_true; - } - - // Medefaidrin: U+16E8F. - if (sequence == 0xf096ba8f) { - if (value) { - *value = 15; - } - - return F_true; - } - - // Medefaidrin: U+16E90. - if (sequence == 0xf096ba90) { - if (value) { - *value = 16; - } - - return F_true; - } - - // Medefaidrin: U+16E91. - if (sequence == 0xf096ba91) { - if (value) { - *value = 17; - } - - return F_true; - } - - // Medefaidrin: U+16E92. - if (sequence == 0xf096ba92) { - if (value) { - *value = 18; - } - - return F_true; - } - - // Medefaidrin: U+16E93. - if (sequence == 0xf096ba93) { - if (value) { - *value = 19; - } - - return F_true; + // Medefaidrin: U+16E8A to U+16E93. + if (sequence <= 0xf096ba93) { + return private_inline_f_utf_character_handle_digit_offset(sequence, 0xf096ba8a, 10, value); } // Medefaidrin: U+16E94 to U+16E96. @@ -5532,6 +5186,18 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c } else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9d) { + // Kaktovik Numerals: U+1D2C0 to U+1D2D3. + if (sequence >= 0xf09d8b80 && sequence <= 0xf09d8b93) { + + // Kaktovik Numerals: U+1D2C0 to U+1D2C9. + if (sequence <= 0xf09d8b89) { + return private_inline_f_utf_character_handle_digit(sequence, 0xf09d8b80, value); + } + + // Kaktovik Numerals: U+1D2CA to U+1D2D3. + return private_inline_f_utf_character_handle_digit_offset(sequence, 0xf09d8b8a, 10, value); + } + // Mayan Numerals: U+1D2E0 to U+1D2F3. if (sequence >= 0xf09d8ba0 && sequence <= 0xf09d8bb3) { @@ -5540,93 +5206,8 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c return private_inline_f_utf_character_handle_digit(sequence, 0xf09d8ba0, value); } - // Mayan Numerals: U+1D2EA. - if (sequence == 0xf09d8baa) { - if (value) { - *value = 10; - } - - return F_true; - } - - // Mayan Numerals: U+1D2EB. - if (sequence == 0xf09d8bab) { - if (value) { - *value = 11; - } - - return F_true; - } - - // Mayan Numerals: U+1D2EC. - if (sequence == 0xf09d8bac) { - if (value) { - *value = 12; - } - - return F_true; - } - - // Mayan Numerals: U+1D2ED. - if (sequence == 0xf09d8bad) { - if (value) { - *value = 13; - } - - return F_true; - } - - // Mayan Numerals: U+1D2EE. - if (sequence == 0xf09d8bae) { - if (value) { - *value = 14; - } - - return F_true; - } - - // Mayan Numerals: U+1D2EF. - if (sequence == 0xf09d8baf) { - if (value) { - *value = 15; - } - - return F_true; - } - - // Mayan Numerals: U+1D2F0. - if (sequence == 0xf09d8bb0) { - if (value) { - *value = 16; - } - - return F_true; - } - - // Mayan Numerals: U+1D2F1. - if (sequence == 0xf09d8bb1) { - if (value) { - *value = 17; - } - - return F_true; - } - - // Mayan Numerals: U+1D2F2. - if (sequence == 0xf09d8bb2) { - if (value) { - *value = 18; - } - - return F_true; - } - - // Mayan Numerals: U+1D2F3. - if (value) { - *value = 19; - } - - return F_true; + // Mayan Numerals: U+1D2EA to U+1D2F3. + return private_inline_f_utf_character_handle_digit_offset(sequence, 0xf09d8baa, 10, value); } // Counting Rod Numerals: U+1D360 to U+1D378. @@ -5807,6 +5388,11 @@ static inline f_status_t private_inline_f_utf_character_handle_digit_from_four(c } else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9e) { + // Nag Mundari: U+1E4F0 to U+1E4F9. + if (sequence >= 0xf09e93b0 && sequence <= 0xf09e93b9) { + return private_inline_f_utf_character_handle_digit(sequence, 0xf09e93b0, value); + } + // Mende Kikakui: U+1E8C7 to U+1E8CF. if (sequence >= 0xf09ea387 && sequence <= 0xf09ea38f) { return private_inline_f_utf_character_handle_digit_from_one(sequence, 0xf09ea387, value); diff --git a/level_0/f_utf/c/private-utf_emoji.c b/level_0/f_utf/c/private-utf_emoji.c index 6c47d3e..0f683bf 100644 --- a/level_0/f_utf/c/private-utf_emoji.c +++ b/level_0/f_utf/c/private-utf_emoji.c @@ -428,8 +428,8 @@ extern "C" { return F_true; } - // Transport and Map Symbols: U+1F6DD to U+1F6DF. - if (sequence >= 0xf09f9b9d && sequence <= 0xf09f9b9f) { + // Transport and Map Symbols: U+1F6DC to U+1F6DF. + if (sequence >= 0xf09f9b9c && sequence <= 0xf09f9b9f) { return F_true; } @@ -473,48 +473,38 @@ extern "C" { return F_true; } - // Symbols and Pictographs Extended-A: U+1FA70 to U+1FA74. - if (sequence >= 0xf09fa9b0 && sequence <= 0xf09fa9b4) { + // Symbols and Pictographs Extended-A: U+1FA70 to U+1FA7C. + if (sequence >= 0xf09fa9b0 && sequence <= 0xf09fa9bc) { return F_true; } - // Symbols and Pictographs Extended-A: U+1FA78 to U+1FA7C. - if (sequence >= 0xf09fa9b8 && sequence <= 0xf09fa9bc) { + // Symbols and Pictographs Extended-A: U+1FA80 to U+1FA88. + if (sequence >= 0xf09faa80 && sequence <= 0xf09faa88) { return F_true; } - // Symbols and Pictographs Extended-A: U+1FA80 to U+1FA86. - if (sequence >= 0xf09faa80 && sequence <= 0xf09faa86) { + // Symbols and Pictographs Extended-A: U+1FA90 to U+1FABD. + if (sequence >= 0xf09faa90 && sequence <= 0xf09faabd) { return F_true; } - // Symbols and Pictographs Extended-A: U+1FA90 to U+1FAAC. - if (sequence >= 0xf09faa90 && sequence <= 0xf09faaac) { + // Symbols and Pictographs Extended-A: U+1FABF to U+1FAC5. + if (sequence >= 0xf09faabf && sequence <= 0xf09fab85) { return F_true; } - // Symbols and Pictographs Extended-A: U+1FAB0 to U+1FABA. - if (sequence >= 0xf09faab0 && sequence <= 0xf09faaba) { + // Symbols and Pictographs Extended-A: U+1FACE to U+1FADB. + if (sequence >= 0xf09fab8e && sequence <= 0xf09fab9b) { return F_true; } - // Symbols and Pictographs Extended-A: U+1FAC0 to U+1FAC5. - if (sequence >= 0xf09fab80 && sequence <= 0xf09fab85) { + // Symbols and Pictographs Extended-A: U+1FAE0 to U+1FAE8. + if (sequence >= 0xf09faba0 && sequence <= 0xf09faba8) { return F_true; } - // Symbols and Pictographs Extended-A: U+1FAD0 to U+1FAD9. - if (sequence >= 0xf09fab90 && sequence <= 0xf09fab99) { - return F_true; - } - - // Symbols and Pictographs Extended-A: U+1FAE0 to U+1FAE7. - if (sequence >= 0xf09faba0 && sequence <= 0xf09faba7) { - return F_true; - } - - // Symbols and Pictographs Extended-A: U+1FAF0 to U+1FAF6. - if (sequence >= 0xf09fabb0 && sequence <= 0xf09fabb6) { + // Symbols and Pictographs Extended-A: U+1FAF0 to U+1FAF8. + if (sequence >= 0xf09fabb0 && sequence <= 0xf09fabb8) { return F_true; } } diff --git a/level_0/f_utf/c/private-utf_numeric.c b/level_0/f_utf/c/private-utf_numeric.c index bc0121e..956fb6e 100644 --- a/level_0/f_utf/c/private-utf_numeric.c +++ b/level_0/f_utf/c/private-utf_numeric.c @@ -569,6 +569,11 @@ extern "C" { return F_true; } + // Kawi: U+11F50 to U+11F59. + if (sequence >= 0xf091bd90 && sequence <= 0xf091bd99) { + return F_true; + } + // Tamil Supplement: U+11FC0 to U+11FD4. if (sequence >= 0xf091bf80 && sequence <= 0xf091bf94) { return F_true; @@ -610,6 +615,11 @@ extern "C" { } else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9d) { + // Kaktovik Numerals: U+1D2C0 to U+1D2D3. + if (sequence >= 0xf09d8b80 && sequence <= 0xf09d8b93) { + return F_true; + } + // Mayan Numerals: U+1D2E0 to U+1D2F3. if (sequence >= 0xf09d8ba0 && sequence <= 0xf09d8bb3) { return F_true; @@ -657,6 +667,11 @@ extern "C" { return F_true; } + // Nag Mundari: U+1E4F0 to U+1E4F9. + if (sequence >= 0xf09e93b0 && sequence <= 0xf09e93b9) { + return F_true; + } + // Mende Kikakui: U+1E8C7 to U+1E8CF. if (sequence >= 0xf09ea387 && sequence <= 0xf09ea38f) { return F_true; diff --git a/level_0/f_utf/c/private-utf_punctuation.c b/level_0/f_utf/c/private-utf_punctuation.c index c089ea6..25a6f10 100644 --- a/level_0/f_utf/c/private-utf_punctuation.c +++ b/level_0/f_utf/c/private-utf_punctuation.c @@ -775,6 +775,11 @@ extern "C" { return F_true; } + // Kawi: U+11F43 to U+11F4F. + if (sequence >= 0xf091bd83 && sequence <= 0xf091bd8f) { + return F_true; + } + // Tamil Supplement: U+11FFF. if (sequence == 0xf091bfbf) { return F_true; diff --git a/level_0/f_utf/c/private-utf_symbol.c b/level_0/f_utf/c/private-utf_symbol.c index ff6d373..7e43f6f 100644 --- a/level_0/f_utf/c/private-utf_symbol.c +++ b/level_0/f_utf/c/private-utf_symbol.c @@ -925,13 +925,18 @@ extern "C" { return F_true; } - // Alchemical Symbols: U+1F6F0 to U+1F773. - if (sequence >= 0xf09f9c80 && sequence <= 0xf09f9db3) { + // Alchemical Symbols: U+1F6F0 to U+1F776. + if (sequence >= 0xf09f9c80 && sequence <= 0xf09f9db6) { return F_true; } - // Geometric Shapes Extended: U+1F780 to U+1F7D8. - if (sequence >= 0xf09f9e80 && sequence <= 0xf09f9f98) { + // Alchemical Symbols: U+1F77B to U+1F77F. + if (sequence >= 0xf09f9dbb && sequence <= 0xf09f9dbf) { + return F_true; + } + + // Geometric Shapes Extended: U+1F780 to U+1F7D9. + if (sequence >= 0xf09f9e80 && sequence <= 0xf09f9f99) { return F_true; } diff --git a/level_0/f_utf/c/private-utf_wide.c b/level_0/f_utf/c/private-utf_wide.c index 3ecc9fc..51e2497 100644 --- a/level_0/f_utf/c/private-utf_wide.c +++ b/level_0/f_utf/c/private-utf_wide.c @@ -118,13 +118,8 @@ extern "C" { return F_false; } - // CJK Unified Ideographs Extension C .. CJK Unified Ideographs Extension E: U+2A700 to U+2CEA1. - if (sequence >= 0xf0aa9c80 && sequence <= 0xf0acbaa1) { - return F_true; - } - - // CJK Compatibility Ideographs Supplement: U+2F800 to U+2FA1D. - if (sequence >= 0xf0afa080 && sequence <= 0xf0afa89d) { + // CJK Unified Ideographs Extension B .. CJK Unified Ideographs Extension H: U+20000 to U+323AF. + if (sequence >= 0xf0a08080 && sequence <= 0xf0b28eaf) { return F_true; } diff --git a/level_0/f_utf/c/utf/private-is_unassigned.c b/level_0/f_utf/c/utf/private-is_unassigned.c index 0d4492f..6b76f72 100644 --- a/level_0/f_utf/c/utf/private-is_unassigned.c +++ b/level_0/f_utf/c/utf/private-is_unassigned.c @@ -425,8 +425,8 @@ extern "C" { return F_true; } - // Kannada: U+0CF3 to U+0CFF. - if (character >= 0xe0b3b300 && character <= 0xe0b3bf00) { + // Kannada: U+0CF4 to U+0CFF. + if (character >= 0xe0b3b400 && character <= 0xe0b3bf00) { return F_true; } @@ -550,8 +550,8 @@ extern "C" { return F_true; } - // Lao: U+0ECE, U+0ECF, U+0EDA, U+0EDB. - if (character == 0xe0bb8e00 || character == 0xe0bb8f00 || character == 0xe0bb9a00 || character == 0xe0bb9b00) { + // Lao: U+0ECF, U+0EDA, U+0EDB. + if (character == 0xe0bb8f00 || character == 0xe0bb9a00 || character == 0xe0bb9b00) { return F_true; } @@ -1623,6 +1623,11 @@ extern "C" { return F_true; } + // Arabic Extended-C: U+10EC0 to U+10EFC. + if (character >= 0xf090bb80 && character <= 0xf090bbbc) { + return F_true; + } + // Brahmi: U+1104E to U+11051. if (character >= 0xf091818e && character <= 0xf0918191) { return F_true; @@ -1648,6 +1653,26 @@ extern "C" { return F_true; } + // Devanagari Extended A: U+11B0A to U+11B5F. + if (character >= 0xf091ac8a && character <= 0xf091ad9f) { + return F_true; + } + + // Kawi: U+11F11. + if (character == 0xf091bc91) { + return F_true; + } + + // Kawi: U+11F3B to U+11F3D. + if (character >= 0xf091bcbb && character <= 0xf091bcbd) { + return F_true; + } + + // Kawi: U+11F5A to U+11F5F. + if (character >= 0xf091bd9a && character <= 0xf091bd9f) { + return F_true; + } + // Supplemental Symbols and Pictographs: U+1F9C1 to U+1FFFD. if (character >= 0xf09fa781 && character <= 0xf09fbfbd) { return F_true; @@ -1688,8 +1713,8 @@ extern "C" { return F_true; } - // Khojki: U+1123E to U+1124F. - if (character >= 0xf09188be && character <= 0xf091898f) { + // Khojki: U+11242 to U+1124F. + if (character >= 0xf0918982 && character <= 0xf091898f) { return F_true; } @@ -1853,8 +1878,8 @@ extern "C" { return F_true; } - // Egyptian Hieroglyphs: U+1342F. - if (character == 0xf09390af) { + // Egyptian Hieroglyph Format Controls: U+13456 to U+1345F. + if (character >= 0xf0939196 && character <= 0xf093919f) { return F_true; } @@ -1918,6 +1943,31 @@ extern "C" { return F_true; } + // Small Kana Extension: U+1B130, U+1B131. + if (character == 0xf09b84b0 || character == 0xf09b84b1) { + return F_true; + } + + // Small Kana Extension: U+1B133 to U+1B14F. + if (character >= 0xf09b84b3 && character <= 0xf09b858f) { + return F_true; + } + + // Small Kana Extension: U+1B153, U+1B154. + if (character == 0xf09b8593 || character == 0xf09b8594) { + return F_true; + } + + // Small Kana Extension: U+1B156 to U+1B163. + if (character >= 0xf09b8596 && character <= 0xf09b85a3) { + return F_true; + } + + // Small Kana Extension: U+1B168 to U+1B16F. + if (character >= 0xf09b85a8 && character <= 0xf09b85af) { + return F_true; + } + // Duployan: U+1BC6B to U+1BC6F. if (character >= 0xf09bb1ab && character <= 0xf09bb1af) { return F_true; @@ -1968,6 +2018,11 @@ extern "C" { return F_true; } + // Kaktovik Numerals: U+1D2D4 to U+1D2DF. + if (character >= 0xf09d8b94 && character <= 0xf09d8b9f) { + return F_true; + } + // Counting Rod Numerals: U+1D372 to U+1D37F. if (character >= 0xf09d8db2 && character <= 0xf09d8dbf) { return F_true; @@ -2018,6 +2073,26 @@ extern "C" { return F_true; } + // Latin Extended-G: U+1DF1F to U+1DF24. + if (character >= 0xf09dbc9f && character <= 0xf09dbca4) { + return F_true; + } + + // Latin Extended-G: U+1DF2B to U+1DFFF. + if (character >= 0xf09dbcab && character <= 0xf09dbfbf) { + return F_true; + } + + // Cyrillic Extended-D: U+1E06E to U+1E08E. + if (character >= 0xf09e81ae && character <= 0xf09e828e) { + return F_true; + } + + // Nag Mundari: U+1E4FA to U+1E4FF. + if (character >= 0xf09e93ba && character <= 0xf09e93bf) { + return F_true; + } + // Mende Kikakui: U+1E8C5 to U+1E8C6. if (character >= 0xf09ea385 && character <= 0xf09ea386) { return F_true; @@ -2168,8 +2243,8 @@ extern "C" { return F_true; } - // Transport and Map Symbols: U+1F6D1 to U+1F6DF. - if (character >= 0xf09f9b91 && character <= 0xf09f9b9f) { + // Transport and Map Symbols: U+1F6D8 to U+1F6DB. + if (character >= 0xf09f9b98 && character <= 0xf09f9b9b) { return F_true; } @@ -2178,18 +2253,28 @@ extern "C" { return F_true; } - // Transport and Map Symbols: U+1F6F4 to U+1F6FF. - if (character >= 0xf09f9bb4 && character <= 0xf09f9bbf) { + // Transport and Map Symbols: U+1F6FD to U+1F6FF. + if (character >= 0xf09f9bbd && character <= 0xf09f9bbf) { + return F_true; + } + + // Alchemical Symbols: U+1F777 to U+1F77A. + if (character >= 0xf09f9db7 && character <= 0xf09f9dba) { + return F_true; + } + + // Geometric Shapes Extended: U+1F7DA to U+1F7DF. + if (character >= 0xf09f9f9a && character <= 0xf09f9f9f) { return F_true; } - // Alchemical Symbols: U+1F774 to U+1F77F. - if (character >= 0xf09f9db4 && character <= 0xf09f9dbf) { + // Geometric Shapes Extended: U+1F7EC to U+1F7EF. + if (character >= 0xf09f9fac && character <= 0xf09f9faf) { return F_true; } - // Geometric Shapes Extended: U+1F7D5 to U+1F7FF. - if (character >= 0xf09f9f95 && character <= 0xf09f9fbf) { + // Geometric Shapes Extended: U+1F7F1 to U+1F7FF. + if (character >= 0xf09f9fb1 && character <= 0xf09f9fbf) { return F_true; } @@ -2238,6 +2323,41 @@ extern "C" { return F_true; } + // Symbols and Pictographs Extended-A: U+1FA7D to U+1FA7F. + if (character >= 0xf09fa9bd && character <= 0xf09fa9bf) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FA89 to U+1FA8F. + if (character >= 0xf09faa89 && character <= 0xf09faa8f) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FABE. + if (character == 0xf09faabe) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FAC6 to U+1FACD. + if (character >= 0xf09fab86 && character <= 0xf09fab8d) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FADC to U+1FADF. + if (character >= 0xf09fab9c && character <= 0xf09fab9f) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FAE9 to U+1FAEF. + if (character >= 0xf09faba9 && character <= 0xf09fabaf) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FAF9 to U+1FAFF. + if (character >= 0xf09fabb9 && character <= 0xf09fabbf) { + return F_true; + } + // CJK Unified Ideographs Extension B: U+2A6D7 to U+2A6FF. if (character >= 0xf0aa9b97 && character <= 0xf0aa9bbf) { return F_true; @@ -2258,13 +2378,28 @@ extern "C" { return F_true; } + // CJK Unified Ideographs Extension F: U+2CEB2 to U+2CEBF. + if (character >= 0xf0acbab2 && character <= 0xf0acbabf) { + return F_true; + } + // CJK Compatibility Ideographs Supplement: U+2FA1E to U+2FFFD. if (character >= 0xf0b08080 && character <= 0xf0afbfbd) { return F_true; } - // Tertiary Ideographic Plane: U+30000 to U+3FFFD. - if (character >= 0xf0b08080 && character <= 0xf0bfbfbd) { + // CJK Unified Ideographs Extension G: U+3134B to U+3134F. + if (character >= 0xf0b18d8b && character <= 0xf0b18d8f) { + return F_true; + } + + // Tertiary Ideographic Plane: U+30000 to U+3134F. + if (character >= 0xf0b08080 && character <= 0xf0b18d8f) { + return F_true; + } + + // Tertiary Ideographic Plane: U+323AF to U+3FFFD. + if (character >= 0xf0b28eaf && character <= 0xf0bfbfbd) { return F_true; } diff --git a/level_0/f_utf/data/tests/bytesequences/combining-all.txt b/level_0/f_utf/data/tests/bytesequences/combining-all.txt index 085bbe3..a8926d5 100644 --- a/level_0/f_utf/data/tests/bytesequences/combining-all.txt +++ b/level_0/f_utf/data/tests/bytesequences/combining-all.txt @@ -500,6 +500,7 @@ 14726029 14726050 14726051 +14726067 14726273 14726529 14726530 @@ -538,6 +539,7 @@ 14727865 14727867 14727868 +14728078 14728344 14728345 14728373 @@ -656,6 +658,9 @@ 4036012223 4036012965 4036012966 +4036017085 +4036017086 +4036017087 4036067457 4036067512 4036067513 @@ -712,6 +717,27 @@ 4036069258 4036069259 4036069260 +4036082816 +4036082817 +4036082818 +4036082819 +4036082868 +4036082869 +4036082870 +4036082871 +4036082872 +4036082873 +4036082874 +4036082878 +4036082879 +4036083072 +4036083073 +4036083074 +4036919951 +4036924332 +4036924333 +4036924334 +4036924335 4036069551 4036069552 4036069553 diff --git a/level_0/f_utf/data/tests/bytesequences/control-all.txt b/level_0/f_utf/data/tests/bytesequences/control-all.txt index 4b9f4da..78d4695 100644 --- a/level_0/f_utf/data/tests/bytesequences/control-all.txt +++ b/level_0/f_utf/data/tests/bytesequences/control-all.txt @@ -63,3 +63,84 @@ 49821 49822 49823 +49837 +55424 +55425 +55426 +55427 +55428 +55429 +55452 +56221 +56463 +14721954 +14786702 +14844043 +14844044 +14844045 +14844046 +14844047 +14844074 +14844075 +14844076 +14844077 +14844078 +14844320 +14844321 +14844322 +14844323 +14844324 +14844326 +14844327 +14844328 +14844329 +14844330 +14844331 +14844332 +14844333 +14844334 +14844335 +15711167 +15712185 +15712186 +15712187 +4036068029 +4036068237 +4036202672 +4036202673 +4036202674 +4036202675 +4036202676 +4036202677 +4036202678 +4036202679 +4036202680 +4036202681 +4036202682 +4036202683 +4036202684 +4036202685 +4036202686 +4036202687 +4036202880 +4036202881 +4036202882 +4036202883 +4036202884 +4036202885 +4036202886 +4036202887 +4036202888 +4036202889 +4036202890 +4036202891 +4036202892 +4036202893 +4036202894 +4036202895 +4036202896 +4036202897 +4036202898 +4036202899 +4036202900 +4036202901 diff --git a/level_0/f_utf/data/tests/bytesequences/digit-all.txt b/level_0/f_utf/data/tests/bytesequences/digit-all.txt index cc08002..0d0aae1 100644 --- a/level_0/f_utf/data/tests/bytesequences/digit-all.txt +++ b/level_0/f_utf/data/tests/bytesequences/digit-all.txt @@ -1422,45 +1422,6 @@ 14785454 14785455 14785456 -14845344 -14845345 -14845346 -14845347 -14845348 -14845349 -14845350 -14845351 -14845352 -14845353 -14845354 -14845355 -14845356 -14845357 -14845358 -14845359 -14845360 -14845361 -14845362 -14845363 -14845364 -14845365 -14845366 -14845367 -14845368 -14845369 -14845370 -14845371 -14845372 -14845373 -14845374 -14845375 -14845568 -14845569 -14845570 -14845573 -14845574 -14845575 -14845576 14909575 14909601 14909602 @@ -1640,3 +1601,43 @@ 4036137388 4036137389 4036137390 +4036083088 +4036083089 +4036083090 +4036083091 +4036083092 +4036083093 +4036083094 +4036083095 +4036083096 +4036083097 +4036856704 +4036856705 +4036856706 +4036856707 +4036856708 +4036856709 +4036856710 +4036856711 +4036856712 +4036856713 +4036856714 +4036856715 +4036856716 +4036856717 +4036856718 +4036856719 +4036856720 +4036856721 +4036856722 +4036856723 +4036924336 +4036924337 +4036924338 +4036924339 +4036924340 +4036924341 +4036924342 +4036924343 +4036924344 +4036924345 diff --git a/level_0/f_utf/data/tests/bytesequences/emoji-all.txt b/level_0/f_utf/data/tests/bytesequences/emoji-all.txt index 2c40578..09ad40e 100644 --- a/level_0/f_utf/data/tests/bytesequences/emoji-all.txt +++ b/level_0/f_utf/data/tests/bytesequences/emoji-all.txt @@ -829,6 +829,7 @@ 4036991893 4036991894 4036991895 +4036991900 4036991901 4036991902 4036991903 @@ -1112,6 +1113,9 @@ 4036995506 4036995507 4036995508 +4036995509 +4036995510 +4036995511 4036995512 4036995513 4036995514 @@ -1124,6 +1128,8 @@ 4036995716 4036995717 4036995718 +4036995719 +4036995720 4036995728 4036995729 4036995730 @@ -1153,6 +1159,9 @@ 4036995754 4036995755 4036995756 +4036995757 +4036995758 +4036995759 4036995760 4036995761 4036995762 @@ -1164,12 +1173,18 @@ 4036995768 4036995769 4036995770 +4036995771 +4036995772 +4036995773 +4036995775 4036995968 4036995969 4036995970 4036995971 4036995972 4036995973 +4036995982 +4036995983 4036995984 4036995985 4036995986 @@ -1180,6 +1195,8 @@ 4036995991 4036995992 4036995993 +4036995994 +4036995995 4036996000 4036996001 4036996002 @@ -1195,6 +1212,8 @@ 4036996020 4036996021 4036996022 +4036996023 +4036996024 14844297 14845090 14845113 diff --git a/level_0/f_utf/data/tests/codepoints/combining-all.txt b/level_0/f_utf/data/tests/codepoints/combining-all.txt index 5be5993..a0632ed 100644 --- a/level_0/f_utf/data/tests/codepoints/combining-all.txt +++ b/level_0/f_utf/data/tests/codepoints/combining-all.txt @@ -500,6 +500,7 @@ U+0CCC U+0CCD U+0CE2 U+0CE3 +U+0CF3 U+0D01 U+0D41 U+0D42 @@ -538,6 +539,7 @@ U+0EB8 U+0EB9 U+0EBB U+0EBC +U+0ECE U+0F18 U+0F19 U+0F35 @@ -656,6 +658,9 @@ U+10A3A U+10A3F U+10AE5 U+10AE6 +U+10EFD +U+10EFE +U+10EFF U+11001 U+11038 U+11039 @@ -712,6 +717,27 @@ U+111BE U+111CA U+111CB U+111CC +U+11F00 +U+11F01 +U+11F02 +U+11F03 +U+11F34 +U+11F35 +U+11F36 +U+11F37 +U+11F38 +U+11F39 +U+11F3A +U+11F3E +U+11F3F +U+11F40 +U+11F41 +U+11F42 +U+1E08F +U+1E4EC +U+1E4ED +U+1E4EE +U+1E4EF U+1122F U+11230 U+11231 diff --git a/level_0/f_utf/data/tests/codepoints/control-all.txt b/level_0/f_utf/data/tests/codepoints/control-all.txt index 7d8df7b..e333f00 100644 --- a/level_0/f_utf/data/tests/codepoints/control-all.txt +++ b/level_0/f_utf/data/tests/codepoints/control-all.txt @@ -63,3 +63,84 @@ U+009C U+009D U+009E U+009F +U+00AD +U+0600 +U+0601 +U+0602 +U+0603 +U+0604 +U+0605 +U+061C +U+06DD +U+070F +U+08E2 +U+180E +U+200B +U+200C +U+200D +U+200E +U+200F +U+202A +U+202B +U+202C +U+202D +U+202E +U+2060 +U+2061 +U+2062 +U+2063 +U+2064 +U+2066 +U+2067 +U+2068 +U+2069 +U+206A +U+206B +U+206C +U+206D +U+206E +U+206F +U+FEFF +U+FFF9 +U+FFFA +U+FFFB +U+110BD +U+110CD +U+13430 +U+13431 +U+13432 +U+13433 +U+13434 +U+13435 +U+13436 +U+13437 +U+13438 +U+13439 +U+1343A +U+1343B +U+1343C +U+1343D +U+1343E +U+1343F +U+13440 +U+13441 +U+13442 +U+13443 +U+13444 +U+13445 +U+13446 +U+13447 +U+13448 +U+13449 +U+1344A +U+1344B +U+1344C +U+1344D +U+1344E +U+1344F +U+13450 +U+13451 +U+13452 +U+13453 +U+13454 +U+13455 diff --git a/level_0/f_utf/data/tests/codepoints/digit-all.txt b/level_0/f_utf/data/tests/codepoints/digit-all.txt index d8b334f..7c17dbc 100644 --- a/level_0/f_utf/data/tests/codepoints/digit-all.txt +++ b/level_0/f_utf/data/tests/codepoints/digit-all.txt @@ -1422,45 +1422,6 @@ U+1F10C U+16EE U+16EF U+16F0 -U+2160 -U+2161 -U+2162 -U+2163 -U+2164 -U+2165 -U+2166 -U+2167 -U+2168 -U+2169 -U+216A -U+216B -U+216C -U+216D -U+216E -U+216F -U+2170 -U+2171 -U+2172 -U+2173 -U+2174 -U+2175 -U+2176 -U+2177 -U+2178 -U+2179 -U+217A -U+217B -U+217C -U+217D -U+217E -U+217F -U+2180 -U+2181 -U+2182 -U+2185 -U+2186 -U+2187 -U+2188 U+3007 U+3021 U+3022 @@ -1640,3 +1601,43 @@ U+1246B U+1246C U+1246D U+1246E +U+11F50 +U+11F51 +U+11F52 +U+11F53 +U+11F54 +U+11F55 +U+11F56 +U+11F57 +U+11F58 +U+11F59 +U+1D2C0 +U+1D2C1 +U+1D2C2 +U+1D2C3 +U+1D2C4 +U+1D2C5 +U+1D2C6 +U+1D2C7 +U+1D2C8 +U+1D2C9 +U+1D2CA +U+1D2CB +U+1D2CC +U+1D2CD +U+1D2CE +U+1D2CF +U+1D2D0 +U+1D2D1 +U+1D2D2 +U+1D2D3 +U+1E4F0 +U+1E4F1 +U+1E4F2 +U+1E4F3 +U+1E4F4 +U+1E4F5 +U+1E4F6 +U+1E4F7 +U+1E4F8 +U+1E4F9 diff --git a/level_0/f_utf/data/tests/codepoints/emoji-all.txt b/level_0/f_utf/data/tests/codepoints/emoji-all.txt index 3a0a325..6fd5e16 100644 --- a/level_0/f_utf/data/tests/codepoints/emoji-all.txt +++ b/level_0/f_utf/data/tests/codepoints/emoji-all.txt @@ -829,6 +829,7 @@ U+1F6D2 U+1F6D5 U+1F6D6 U+1F6D7 +U+1F6DC U+1F6DD U+1F6DE U+1F6DF @@ -1112,6 +1113,9 @@ U+1FA71 U+1FA72 U+1FA73 U+1FA74 +U+1FA75 +U+1FA76 +U+1FA77 U+1FA78 U+1FA79 U+1FA7A @@ -1124,6 +1128,8 @@ U+1FA83 U+1FA84 U+1FA85 U+1FA86 +U+1FA87 +U+1FA88 U+1FA90 U+1FA91 U+1FA92 @@ -1153,6 +1159,9 @@ U+1FAA9 U+1FAAA U+1FAAB U+1FAAC +U+1FAAD +U+1FAAE +U+1FAAF U+1FAB0 U+1FAB1 U+1FAB2 @@ -1164,12 +1173,18 @@ U+1FAB7 U+1FAB8 U+1FAB9 U+1FABA +U+1FABB +U+1FABC +U+1FABD +U+1FABF U+1FAC0 U+1FAC1 U+1FAC2 U+1FAC3 U+1FAC4 U+1FAC5 +U+1FACE +U+1FACF U+1FAD0 U+1FAD1 U+1FAD2 @@ -1195,6 +1210,8 @@ U+1FAF3 U+1FAF4 U+1FAF5 U+1FAF6 +U+1FAF7 +U+1FAF8 U+2049 U+2122 U+2139 diff --git a/level_0/f_utf/data/tests/values/digit-all.txt b/level_0/f_utf/data/tests/values/digit-all.txt index c33c677..2896039 100644 --- a/level_0/f_utf/data/tests/values/digit-all.txt +++ b/level_0/f_utf/data/tests/values/digit-all.txt @@ -1422,45 +1422,6 @@ 17 18 19 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -50 -100 -500 -1000 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -50 -100 -500 -1000 -1000 -5000 -10000 -6 -50 -50000 -100000 0 1 2 @@ -1640,3 +1601,43 @@ 7 8 9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/level_3/byte_dump/c/byte_dump.h b/level_3/byte_dump/c/byte_dump.h index 9fb1233..cd798c7 100644 --- a/level_3/byte_dump/c/byte_dump.h +++ b/level_3/byte_dump/c/byte_dump.h @@ -5,7 +5,7 @@ * API Version: 0.7 * Licenses: lgpl-2.1-or-later * - * This is intendend to support Unicode 14.0. + * This is intended to support Unicode 15.0. * * When using "text" mode, this program attempts to translate UTF-8 sequences such that certain codes don't cause printing problems. * There may be cases where there are unknown codes that get printed and the invalid UTF-8 marker may be displayed not by this program but instead by the shell or some other program. diff --git a/level_3/utf8/c/utf8.h b/level_3/utf8/c/utf8.h index ffd1a54..efbcbaa 100644 --- a/level_3/utf8/c/utf8.h +++ b/level_3/utf8/c/utf8.h @@ -5,7 +5,7 @@ * API Version: 0.7 * Licenses: lgpl-2.1-or-later * - * This is intendend to support Unicode 14.0. + * This is intended to support Unicode 15.0. * * This is a program for handling basic UTF-8 related conversions. * - Convert from UTF-8 character to bytesequence. -- 1.8.3.1