From d00a6090c1ab35ba44fc53d12abc45e97da31d93 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Thu, 16 Jun 2022 22:56:34 -0500 Subject: [PATCH] Update: Unit tests for f_utf and relating changes or bug fixes. Fix several problems exposed by unit tests. Fix several unit tests to work as expected due to problems with the data files. At some point I seem to have diverged from always ensuring that the f_utf_char_t is always big-endian. I probably got lost in ensuring the differences between big and little endian that I ended up making the f_utf_char_t act little-endian in cases where host is little-endian. The f_utf_char_t must always be big-endian. However, there are cases where the big and little endian behavior must be processed. Break up the macros into having "_be" and "_le" to make this possible. The iscntrl() check return value needs to be explicitly handled to ensure that only F_false or F_true is returned. This is already fixed in one function already. Apply the existing fix to the other function. The f_utf_char_t should be seen as a single character rather than a stream of bytes. Unit tests now treat any non-zero value after the designated width as invalid. The is valid checking code now tests for this invalid case. The unit tests are improved. Test for F_true and F_false rather than calling assert_true() and assert_false(). Error bits and other status codes were previously passing when they should fail due to the use of assert_true() and assert_false(). This commit changes the byte order of the f_utf_char_t. This will break code such as the code used in the utf8 program. A follow up commit is necessary to fix any byte order problems. --- level_0/f_utf/c/private-utf_combining.c | 8 +- level_0/f_utf/c/private-utf_control.c | 2 +- level_0/f_utf/c/private-utf_digit.c | 8 +- level_0/f_utf/c/private-utf_emoji.c | 134 ++--- level_0/f_utf/c/private-utf_phonetic.c | 9 +- level_0/f_utf/c/private-utf_punctuation.c | 95 +++- level_0/f_utf/c/private-utf_subscript.c | 2 +- level_0/f_utf/c/private-utf_superscript.c | 4 +- level_0/f_utf/c/private-utf_symbol.c | 501 ++++++++++++++--- level_0/f_utf/c/private-utf_valid.c | 15 +- level_0/f_utf/c/utf/common.h | 137 +++-- level_0/f_utf/c/utf/is.c | 6 +- level_0/f_utf/c/utf/is_character.c | 7 +- level_0/f_utf/c/utf/is_character.h | 2 + .../data/tests/bytesequences/combining-all.txt | 541 ------------------ .../f_utf/data/tests/bytesequences/emoji-all.txt | 617 --------------------- .../data/tests/bytesequences/superscript-all.txt | 314 +++++++++-- .../f_utf/data/tests/codepoints/combining-all.txt | 541 ------------------ level_0/f_utf/data/tests/codepoints/emoji-all.txt | 617 --------------------- level_0/f_utf/tests/unit/c/data-utf.c | 31 +- level_0/f_utf/tests/unit/c/data-utf.h | 4 +- .../unit/c/test-utf-character_is_alphabetic.c | 6 +- .../tests/unit/c/test-utf-character_is_combining.c | 6 +- .../tests/unit/c/test-utf-character_is_control.c | 6 +- .../tests/unit/c/test-utf-character_is_digit.c | 6 +- .../tests/unit/c/test-utf-character_is_emoji.c | 7 +- .../tests/unit/c/test-utf-character_is_numeric.c | 6 +- .../tests/unit/c/test-utf-character_is_phonetic.c | 6 +- .../tests/unit/c/test-utf-character_is_private.c | 6 +- .../unit/c/test-utf-character_is_punctuation.c | 6 +- .../tests/unit/c/test-utf-character_is_subscript.c | 6 +- .../unit/c/test-utf-character_is_superscript.c | 6 +- .../tests/unit/c/test-utf-character_is_surrogate.c | 6 +- .../tests/unit/c/test-utf-character_is_symbol.c | 6 +- .../tests/unit/c/test-utf-character_is_valid.c | 87 +-- .../unit/c/test-utf-character_is_whitespace.c | 6 +- .../tests/unit/c/test-utf-character_is_wide.c | 6 +- .../tests/unit/c/test-utf-character_is_word.c | 12 +- .../unit/c/test-utf-character_is_zero_width.c | 6 +- .../f_utf/tests/unit/c/test-utf-is_alphabetic.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_combining.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_control.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_digit.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_emoji.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_numeric.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_private.c | 4 +- .../f_utf/tests/unit/c/test-utf-is_punctuation.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_subscript.c | 4 +- .../f_utf/tests/unit/c/test-utf-is_superscript.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_surrogate.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_symbol.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_valid.c | 18 +- .../f_utf/tests/unit/c/test-utf-is_whitespace.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_wide.c | 4 +- level_0/f_utf/tests/unit/c/test-utf-is_word.c | 8 +- .../f_utf/tests/unit/c/test-utf-is_zero_width.c | 4 +- 57 files changed, 1132 insertions(+), 2749 deletions(-) diff --git a/level_0/f_utf/c/private-utf_combining.c b/level_0/f_utf/c/private-utf_combining.c index 16d3940..e7230b6 100644 --- a/level_0/f_utf/c/private-utf_combining.c +++ b/level_0/f_utf/c/private-utf_combining.c @@ -212,7 +212,7 @@ extern "C" { } // Gurmukhi: U+0A01 to U+0A03. - if (character >= 0xe0a88100 && character <= 0xe0a88230) { + if (character >= 0xe0a88100 && character <= 0xe0a88300) { return F_true; } @@ -754,12 +754,12 @@ extern "C" { return F_true; } - // Saurashtra: U+A8C4. - if (character == 0xeaa38400) { + // Saurashtra: U+A8C4, U+A8C5. + if (character == 0xeaa38400 || character == 0xeaa38500) { return F_true; } - // Devanagari Extended: U+A6F0 to U+A8F1. + // Devanagari Extended: U+A8E0 to U+A8F1. if (character >= 0xeaa3a000 && character <= 0xeaa3b100) { return F_true; } diff --git a/level_0/f_utf/c/private-utf_control.c b/level_0/f_utf/c/private-utf_control.c index 0d519a6..0e8fda5 100644 --- a/level_0/f_utf/c/private-utf_control.c +++ b/level_0/f_utf/c/private-utf_control.c @@ -102,7 +102,7 @@ extern "C" { return F_true; } - // Shothand Format Controls: U+1BCA0 to U+1BCA3. + // Shorthand Format Controls: U+1BCA0 to U+1BCA3. if (character >= 0xf09bb2a0 && character <= 0xf09bb2a3) { return F_true; } diff --git a/level_0/f_utf/c/private-utf_digit.c b/level_0/f_utf/c/private-utf_digit.c index f794ac2..5b29d9f 100644 --- a/level_0/f_utf/c/private-utf_digit.c +++ b/level_0/f_utf/c/private-utf_digit.c @@ -97,18 +97,18 @@ extern "C" { if (character >= 0xe0bca000 && character <= 0xe0bca900) { return F_true; } + } + else if (macro_f_utf_char_t_to_char_1(character) == 0xe1) { // Myanmar: U+1040 to U+1049. - if (character >= 0xe0818000 && character <= 0xe0818900) { + if (character >= 0xe1818000 && character <= 0xe1818900) { return F_true; } // Myanmar (Shan): U+1090 to U+1099. - if (character >= 0xe0829000 && character <= 0xe0829900) { + if (character >= 0xe1829000 && character <= 0xe1829900) { return F_true; } - } - else if (macro_f_utf_char_t_to_char_1(character) == 0xe1) { // Khmer: U+17E0 to U+17E9. if (character >= 0xe19fa000 && character <= 0xe19fa900) { diff --git a/level_0/f_utf/c/private-utf_emoji.c b/level_0/f_utf/c/private-utf_emoji.c index 0511052..994183b 100644 --- a/level_0/f_utf/c/private-utf_emoji.c +++ b/level_0/f_utf/c/private-utf_emoji.c @@ -29,7 +29,7 @@ extern "C" { } // Letterlike Symbols: U+2122, U+2139 - if (character == 0xe2818900 || character == 0xe284b900) { + if (character == 0xe284a200 || character == 0xe284b900) { return F_true; } @@ -268,243 +268,253 @@ extern "C" { return F_true; } - // U+1F0CF to U+1F171. + // Playing Cards: U+1F0CF to U+1F171. if (character >= 0xf09f8084 && character <= 0xf09f85b1) { return F_true; } - // U+1F17E, U+1F17F, U+1F18E. + // Enclosed Alphanumeric Supplement: U+1F17E, U+1F17F, U+1F18E. if (character == 0xf09f85be || character == 0xf09f85bf || character == 0xf09f868e) { return F_true; } - // U+1F191 to U+1F19A. + // Enclosed Alphanumeric Supplement: U+1F191 to U+1F19A. if (character >= 0xf09f8691 && character <= 0xf09f869a) { return F_true; } - // U+1F201, U+1F202, U+1F21A, U+1F22F. + // Enclosed Alphanumeric Supplement: U+1F1E6. + if (character == 0xf09f87a6) { + return F_true; + } + + // Enclosed Ideographic Supplement: U+1F201, U+1F202, U+1F21A, U+1F22F. if (character == 0xf09f8881 || character == 0xf09f8882 || character == 0xf09f889a || character == 0xf09f88af) { return F_true; } - // U+1F232 to U+1F23A. + // Enclosed Ideographic Supplement: U+1F232 to U+1F23A. if (character >= 0xf09f88b2 && character <= 0xf09f88ba) { return F_true; } - // U+1F250, U+1F251. + // Enclosed Ideographic Supplement: U+1F250, U+1F251. if (character == 0xf09f8990 || character == 0xf09f8991) { return F_true; } - // U+1F300 to U+1F321. + // Miscellaneous Symbols and Pictographs: U+1F300 to U+1F321. if (character >= 0xf09f8c80 && character <= 0xf09f8ca1) { return F_true; } - // U+1F324 to U+1F393. + // Miscellaneous Symbols and Pictographs: U+1F324 to U+1F393. if (character >= 0xf09f8ca4 && character <= 0xf09f8e93) { return F_true; } - // U+1F396, U+1F397. + // Miscellaneous Symbols and Pictographs: U+1F396, U+1F397. if (character == 0xf09f8e96 || character == 0xf09f8e97) { return F_true; } - // U+1F399 to U+1F39B. + // Miscellaneous Symbols and Pictographs: U+1F399 to U+1F39B. if (character >= 0xf09f8e99 && character <= 0xf09f8e9b) { return F_true; } - // U+1F39E to U+1F3F0. + // Miscellaneous Symbols and Pictographs: U+1F39E to U+1F3F0. if (character >= 0xf09f8e9e && character <= 0xf09f8fb0) { return F_true; } - // U+1F3F3 to U+1F3F5. + // Miscellaneous Symbols and Pictographs: U+1F3F3 to U+1F3F5. if (character >= 0xf09f8fb3 && character <= 0xf09f8fb5) { return F_true; } - // U+1F3F7 to U+1F4FD. + // Miscellaneous Symbols and Pictographs: U+1F3F7 to U+1F4FD. if (character >= 0xf09f8fb7 && character <= 0xf09f93bd) { return F_true; } - // U+1F4FF to U+1F53D. + // Miscellaneous Symbols and Pictographs: U+1F4FF to U+1F53D. if (character >= 0xf09f93bf && character <= 0xf09f94bd) { return F_true; } - // U+1F549 to U+1F54E. + // Miscellaneous Symbols and Pictographs: U+1F549 to U+1F54E. if (character >= 0xf09f9589 && character <= 0xf09f958e) { return F_true; } - // U+1F550 to U+1F567. + // Miscellaneous Symbols and Pictographs: U+1F550 to U+1F567. if (character >= 0xf09f9590 && character <= 0xf09f95a7) { return F_true; } - // U+1F56F, U+1F570. + // Miscellaneous Symbols and Pictographs: U+1F56F, U+1F570. if (character == 0xf09f95af || character == 0xf09f95b0) { return F_true; } - // U+1F573 to U+1F57A. + // Miscellaneous Symbols and Pictographs: U+1F573 to U+1F57A. if (character >= 0xf09f95b3 && character <= 0xf09f95ba) { return F_true; } - // U+1F587. + // Miscellaneous Symbols and Pictographs: U+1F587. if (character == 0xf09f9687) { return F_true; } - // U+1F58A to U+1F58D. + // Miscellaneous Symbols and Pictographs: U+1F58A to U+1F58D. if (character >= 0xf09f968a && character <= 0xf09f968d) { return F_true; } - // U+1F590, U+1F595, U+1F596, U+1F5A4. + // Miscellaneous Symbols and Pictographs: U+1F590, U+1F595, U+1F596, U+1F5A4. if (character == 0xf09f9690 || character == 0xf09f9695 || character == 0xf09f9696 || character == 0xf09f96a4) { return F_true; } - // U+1F5A5, U+1F5A8, U+1F5B1, U+1F5B2. + // Miscellaneous Symbols and Pictographs: U+1F5A5, U+1F5A8, U+1F5B1, U+1F5B2. if (character == 0xf09f96a5 || character == 0xf09f96a8 || character == 0xf09f96b1 || character == 0xf09f96b2) { return F_true; } - // U+1F5BC. + // Miscellaneous Symbols and Pictographs: U+1F5BC. if (character == 0xf09f96bc) { return F_true; } - // U+1F5C2 to U+1F5C4. + // Miscellaneous Symbols and Pictographs: U+1F5C2 to U+1F5C4. if (character >= 0xf09f9782 && character <= 0xf09f9784) { return F_true; } - // U+1F5D1 to U+1F5D3. + // Miscellaneous Symbols and Pictographs: U+1F5D1 to U+1F5D3. if (character >= 0xf09f9791 && character <= 0xf09f9793) { return F_true; } - // U+1F5DC to U+1F5DE. + // Miscellaneous Symbols and Pictographs: U+1F5DC to U+1F5DE. if (character >= 0xf09f979c && character <= 0xf09f979e) { return F_true; } - // U+1F5E1, U+1F5E3, U+1F5E8, U+1F5EF. + // Miscellaneous Symbols and Pictographs: U+1F5E1, U+1F5E3, U+1F5E8, U+1F5EF. if (character == 0xf09f97a1 || character == 0xf09f97a3 || character == 0xf09f97a8 || character == 0xf09f97af) { return F_true; } - // U+1F5F3. + // Miscellaneous Symbols and Pictographs: U+1F5F3. if (character == 0xf09f97b3) { return F_true; } - // U+1F5FA to U+1F6C5. + // Miscellaneous Symbols and Pictographs: U+1F5FA to U+1F6C5. if (character >= 0xf09f97ba && character <= 0xf09f9b85) { return F_true; } - // U+1F6CB to U+1F6D2. + // Transport and Map Symbols: U+1F6CB to U+1F6D2. if (character >= 0xf09f9b8b && character <= 0xf09f9b92) { return F_true; } - // U+1F6D5 to U+1F6D7. + // Transport and Map Symbols: U+1F6D5 to U+1F6D7. if (character >= 0xf09f9b95 && character <= 0xf09f9b97) { return F_true; } - // U+1F6E0 to U+1F6E5. + // Transport and Map Symbols: U+1F6DD to U+1F6DF. + if (character >= 0xf09f9b9d && character <= 0xf09f9b9f) { + return F_true; + } + + // Transport and Map Symbols: U+1F6E0 to U+1F6E5. if (character >= 0xf09f9ba0 && character <= 0xf09f9ba5) { return F_true; } - // U+1F6E9, U+1F6EB, U+1F6EC, U+1F6F0. + // Transport and Map Symbols: U+1F6E9, U+1F6EB, U+1F6EC, U+1F6F0. if (character == 0xf09f9ba9 || character == 0xf09f9bab || character == 0xf09f9bac || character == 0xf09f9bb0) { return F_true; } - // U+1F6F3 to U+1F6FC. + // Transport and Map Symbols: U+1F6F3 to U+1F6FC. if (character >= 0xf09f9bb3 && character <= 0xf09f9bbc) { return F_true; } - // U+1F7E0 to U+1F7EB. + // Geometric Shapes Extended: U+1F7E0 to U+1F7EB. if (character >= 0xf09f9fa0 && character <= 0xf09f9fab) { return F_true; } - // U+1F90C to U+1F93A. - if (character >= 0xf09fa48c && character <= 0xf09fa4ba) { + // Geometric Shapes Extended: U+1F7F0. + if (character == 0xf09f9fb0) { return F_true; } - // U+1F93C to U+1F945. - if (character >= 0xf09fa4bc && character <= 0xf09fa585) { + // Supplemental Symbols and Pictographs: U+1F90C to U+1F93A. + if (character >= 0xf09fa48c && character <= 0xf09fa4ba) { return F_true; } - // U+1F947 to U+1F978. - if (character >= 0xf09fa587 && character <= 0xf09fa5b8) { + // Supplemental Symbols and Pictographs: U+1F93C to U+1F945. + if (character >= 0xf09fa4bc && character <= 0xf09fa585) { return F_true; } - // U+1F97A to U+1F9CB. - if (character >= 0xf09fa5ba && character <= 0xf09fa78b) { + // Supplemental Symbols and Pictographs to Symbols and Pictographs Extended-A: U+1F947 to U+U+1FA74. + if (character >= 0xf09fa587 && character <= 0xf09fa9b4) { return F_true; } - // U+1F9CD to U+1FA74. - if (character >= 0xf09fa78d && character <= 0xf09fa9b4) { + // Symbols and Pictographs Extended-A: U+1FA70 to U+1FA74. + if (character >= 0xf09fa9b0 && character <= 0xf09fa9b4) { return F_true; } - // U+1FA70 to U+1FA74. - if (character >= 0xf09fa9b0 && character <= 0xf09fa9b4) { + // Symbols and Pictographs Extended-A: U+1FA78 to U+1FA7C. + if (character >= 0xf09fa9b8 && character <= 0xf09fa9bc) { return F_true; } - // U+1FA78 to U+1FA7A. - if (character >= 0xf09fa9b8 && character <= 0xf09fa9ba) { + // Symbols and Pictographs Extended-A: U+1FA80 to U+1FA86. + if (character >= 0xf09faa80 && character <= 0xf09faa86) { return F_true; } - // U+1FA80 to U+1FA86. - if (character >= 0xf09faa80 && character <= 0xf09faa86) { + // Symbols and Pictographs Extended-A: U+1FA90 to U+1FAAC. + if (character >= 0xf09faa90 && character <= 0xf09faaac) { return F_true; } - // U+1FA90 to U+1FAA8. - if (character >= 0xf09faa90 && character <= 0xf09faaa8) { + // Symbols and Pictographs Extended-A: U+1FAB0 to U+1FABA. + if (character >= 0xf09faab0 && character <= 0xf09faaba) { return F_true; } - // U+1FAB0 to U+1FAB6. - if (character >= 0xf09faab0 && character <= 0xf09faab6) { + // Symbols and Pictographs Extended-A: U+1FAC0 to U+1FAC5. + if (character >= 0xf09fab80 && character <= 0xf09fab85) { return F_true; } - // U+1FAC0 to U+1FAC2. - if (character >= 0xf09fab80 && character <= 0xf09fab82) { + // Symbols and Pictographs Extended-A: U+1FAD0 to U+1FAD9. + if (character >= 0xf09fab90 && character <= 0xf09fab99) { return F_true; } - // U+1FAD0 to U+1FAD6. - if (character >= 0xf09fab90 && character <= 0xf09fab96) { + // Symbols and Pictographs Extended-A: U+1FAE0 to U+1FAE7. + if (character >= 0xf09faba0 && character <= 0xf09faba7) { return F_true; } - // U+1FAF6. - if (character == 0xf09fabb6) { + // Symbols and Pictographs Extended-A: U+1FAF0 to U+1FAF6. + if (character >= 0xf09fabb0 && character <= 0xf09fabb6) { return F_true; } } diff --git a/level_0/f_utf/c/private-utf_phonetic.c b/level_0/f_utf/c/private-utf_phonetic.c index f34b24c..2c69c3f 100644 --- a/level_0/f_utf/c/private-utf_phonetic.c +++ b/level_0/f_utf/c/private-utf_phonetic.c @@ -11,13 +11,8 @@ extern "C" { if (macro_f_utf_char_t_width_is(character) == 3) { - // Phonetic Extensions: U+1D00 to U+1D7F. - if (character >= 0xe1b48000 && character <= 0xe1b5bf00) { - return F_true; - } - - // Phonetic Extensions Supplement: U+1D80 to U+1DBF. - if (character >= 0xe1b6bf00 && character <= 0xe1b6bf00) { + // Phonetic Extensions to Phonetic Extensions Supplement: U+1D00 to U+1DBF. + if (character >= 0xe1b48000 && character <= 0xe1b6bf00) { return F_true; } } diff --git a/level_0/f_utf/c/private-utf_punctuation.c b/level_0/f_utf/c/private-utf_punctuation.c index 8137e01..39b7008 100644 --- a/level_0/f_utf/c/private-utf_punctuation.c +++ b/level_0/f_utf/c/private-utf_punctuation.c @@ -13,48 +13,53 @@ extern "C" { if (macro_f_utf_char_t_to_char_1(character) == 0xc2) { - // Latin-1 Supplement: U+00A1, U+00A7, U+00B6, U+00B7. - if (character == 0xc2a10000 || character == 0xc2a70000 || character == 0xc2b60000 || character == 0xc2b70000) { + // Latin-1 Supplement: U+00A1, U+00A7, U+00AB, U+00B6. + if (character == 0xc2a10000 || character == 0xc2a70000 || character == 0xc2ab0000 || character == 0xc2b60000) { return F_true; } - // Latin-1 Supplement: U+00BF - if (character == 0xc2bf0000) { + // Latin-1 Supplement: U+00B7, U+00BB, U+00BF. + if (character == 0xc2b70000 || character == 0xc2bb0000 || character == 0xc2bf0000) { return F_true; } } else if (macro_f_utf_char_t_to_char_1(character) == 0xcd) { - // Greek and Coptic: U+037E + // Greek and Coptic: U+037E. if (character == 0xcdbe0000) { return F_true; } } else if (macro_f_utf_char_t_to_char_1(character) == 0xce) { - // Greek and Coptic: U+0387 + // Greek and Coptic: U+0387. if (character == 0xce870000) { return F_true; } } else if (macro_f_utf_char_t_to_char_1(character) == 0xd5) { - // Armenian: U+055A to U+055 + // Armenian: U+055A to U+055F. if (character >= 0xd59a0000 && character <= 0xd59f0000) { return F_true; } } else if (macro_f_utf_char_t_to_char_1(character) == 0xd6) { - // Armenian: U+0589, U+058A + // Armenian: U+0589, U+058A. if (character == 0xd6890000 || character == 0xd68a0000) { return F_true; } + + // Hebrew: U+05BE. + if (character == 0xd6be0000) { + return F_true; + } } else if (macro_f_utf_char_t_to_char_1(character) == 0xd7) { - // Hebrew: U+05BE, U+05C0, U+05C3, U+05C6. - if (character == 0xd6be0000 || character == 0xd7800000 || character == 0xd7830000 || character == 0xd7860000) { + // Hebrew: U+05C0, U+05C3, U+05C6. + if (character == 0xd7800000 || character == 0xd7830000 || character == 0xd7860000) { return F_true; } @@ -69,8 +74,6 @@ extern "C" { if (character == 0xd8890000 || character == 0xd88a0000 || character == 0xd88c0000 || character == 0xd88d0000) { return F_true; } - } - else if (macro_f_utf_char_t_to_char_1(character) == 0xd9) { // Arabic: U+061B. if (character == 0xd89b0000) { @@ -78,14 +81,18 @@ extern "C" { } // Arabic: U+061D to U+061F. - if (character >= 0xd89d0000 && character == 0xd89f0000) { + if (character >= 0xd89d0000 && character <= 0xd89f0000) { return F_true; } + } + else if (macro_f_utf_char_t_to_char_1(character) == 0xdb) { // Arabic: U+06D4. if (character == 0xdb940000) { return F_true; } + } + else if (macro_f_utf_char_t_to_char_1(character) == 0xd9) { // Arabic: U+066A to U+066D. if (character >= 0xd9aa0000 && character <= 0xd9ad0000) { @@ -211,8 +218,8 @@ extern "C" { return F_true; } - // Ogham: U+169B. - if (character == 0xe19a9b00) { + // Ogham: U+169B, U+169C. + if (character == 0xe19a9b00 || character == 0xe19a9c00) { return F_true; } @@ -256,8 +263,8 @@ extern "C" { return F_true; } - // Tai Tham: U+1AA9 to U+1AAD. - if (character >= 0xe1aaa900 && character <= 0xe1aaad00) { + // Tai Tham: U+1AA8 to U+1AAD. + if (character >= 0xe1aaa800 && character <= 0xe1aaad00) { return F_true; } @@ -266,13 +273,18 @@ extern "C" { return F_true; } + // Balinese: U+1B7D to U+1B7E. + if (character == 0xe1adbd00 || character == 0xe1adbe00) { + return F_true; + } + // Batak: U+1BFC to U+1BFF. if (character >= 0xe1afbc00 && character <= 0xe1afbf00) { return F_true; } // Lepcha: U+1C3B to U+1C3F. - if (character >= 0xe1b0bb00 && character <= 0xe1afbf00) { + if (character >= 0xe1b0bb00 && character <= 0xe1b0bf00) { return F_true; } @@ -318,8 +330,8 @@ extern "C" { return F_true; } - // Miscellaneous Technical: U+232A. - if (character == 0xe28caa00) { + // Miscellaneous Technical: U+2329, U+232A. + if (character == 0xe28ca900 || character == 0xe28caa00) { return F_true; } @@ -378,8 +390,8 @@ extern "C" { return F_true; } - // Supplemental Punctuation: U+2E52. - if (character == 0xe2b99200) { + // Supplemental Punctuation: U+2E52 to U+2E5D. + if (character >= 0xe2b99200 && character <= 0xe2b99d00) { return F_true; } } @@ -489,8 +501,8 @@ extern "C" { } else if (macro_f_utf_char_t_to_char_1(character) == 0xef) { - // Alphabetic Presentation Forms-A: U+FD3E. - if (character == 0xefb4be00) { + // Alphabetic Presentation Forms-A: U+FD3E, U+FD3F. + if (character == 0xefb4be00 || character == 0xefb4bf00) { return F_true; } @@ -592,7 +604,7 @@ extern "C" { } // Kharoshthi: U+10A50 to U+10A58. - if (character >= 0xf090a990 && character <= 0xd802de58) { + if (character >= 0xf090a990 && character <= 0xf090a998) { return F_true; } @@ -607,7 +619,7 @@ extern "C" { } // Avestan: U+10B39. - if (character == 0xf090a9bf) { + if (character == 0xf090acb9) { return F_true; } @@ -630,6 +642,11 @@ extern "C" { if (character >= 0xf090bd95 && character <= 0xf090bd99) { return F_true; } + + // Old Uyghur: U+10F86 to U+10F89. + if (character >= 0xf090be86 && character <= 0xf090be89) { + return F_true; + } } else if (macro_f_utf_char_t_to_char_2(character) == 0x91) { @@ -769,6 +786,11 @@ extern "C" { if (character >= 0xf09291b0 && character <= 0xf09291b4) { return F_true; } + + // Cypro-Minoan: U+12FF1, U+12FF1. + if (character == 0xf092bfb1 || character == 0xf092bfb2) { + return F_true; + } } else if (macro_f_utf_char_t_to_char_2(character) == 0x96) { @@ -817,6 +839,27 @@ extern "C" { return F_true; } } + else if (macro_f_utf_char_t_to_char_2(character) == 0x9b) { + + // Duployan: U+1BC9F. + if (character == 0xf09bb29f) { + return F_true; + } + } + else if (macro_f_utf_char_t_to_char_2(character) == 0x9d) { + + // Sutton SignWriting: U+1DA87 to U+1DA8B. + if (character >= 0xf09daa87 && character <= 0xf09daa8b) { + return F_true; + } + } + else if (macro_f_utf_char_t_to_char_2(character) == 0x9e) { + + // Adlam: U+1E95E to U+1E95F. + if (character >= 0xf09ea59e && character <= 0xf09ea59f) { + return F_true; + } + } } return F_false; diff --git a/level_0/f_utf/c/private-utf_subscript.c b/level_0/f_utf/c/private-utf_subscript.c index 9309649..6e3b435 100644 --- a/level_0/f_utf/c/private-utf_subscript.c +++ b/level_0/f_utf/c/private-utf_subscript.c @@ -22,7 +22,7 @@ extern "C" { } // Superscripts and Subscripts: U+2090 to U+209C. - if (character >= 0xe2828e00 && character <= 0xe2829000) { + if (character >= 0xe2829000 && character <= 0xe2829c00) { return F_true; } diff --git a/level_0/f_utf/c/private-utf_superscript.c b/level_0/f_utf/c/private-utf_superscript.c index afe0283..10d7886 100644 --- a/level_0/f_utf/c/private-utf_superscript.c +++ b/level_0/f_utf/c/private-utf_superscript.c @@ -78,7 +78,7 @@ extern "C" { } // Unified Canadian Aboriginal Syllabics: U+14EA, U+14EB. - if (character == 0xe1939200 || character == 0xe193aa00) { + if (character == 0xe193aa00 || character == 0xe193ab00) { return F_true; } @@ -143,7 +143,7 @@ extern "C" { else if (macro_f_utf_char_t_to_char_1(character) == 0xe2) { // Superscripts and Subscripts: U+2070, U+2071. - if (character == 0xe2828000 || character == 0xe2828100) { + if (character == 0xe281b000 || character == 0xe281b100) { return F_true; } diff --git a/level_0/f_utf/c/private-utf_symbol.c b/level_0/f_utf/c/private-utf_symbol.c index e2e87bb..5497abc 100644 --- a/level_0/f_utf/c/private-utf_symbol.c +++ b/level_0/f_utf/c/private-utf_symbol.c @@ -28,8 +28,15 @@ extern "C" { return F_true; } - // Latin-1 Supplement: U+00B8, U+00D7, U+00F7. - if (character == 0xc2b80000 || character == 0xc3970000 || character == 0xc3b70000) { + // Latin-1 Supplement: U+00B8. + if (character == 0xc2b80000) { + return F_true; + } + } + else if (macro_f_utf_char_t_to_char_1(character) == 0xc3) { + + // Latin-1 Supplement: U+00D7, U+00F7. + if (character == 0xc3970000 || character == 0xc3b70000) { return F_true; } } @@ -102,17 +109,10 @@ extern "C" { return F_true; } } - else if (macro_f_utf_char_t_to_char_1(character) == 0xd9) { - - // Arabic: U+06DE. - if (character == 0xdb9e0000) { - return F_true; - } - } else if (macro_f_utf_char_t_to_char_1(character) == 0xdb) { - // Arabic: U+06E9, U+06FD, U+06FE. - if (character == 0xdba90000 || character == 0xdbbd0000 || character == 0xdbbe0000) { + // Arabic: U+06DE, U+06E9, U+06FD, U+06FE. + if (character == 0xdb9e0000 || character == 0xdba90000 || character == 0xdbbd0000 || character == 0xdbbe0000) { return F_true; } } @@ -290,13 +290,13 @@ extern "C" { return F_true; } - // Superscripts and Subscripts: U+207A, U+207C. - if (character == 0xe281ba00 || character == 0xe281bc00) { + // Superscripts and Subscripts: U+207A to U+207C. + if (character >= 0xe281ba00 && character <= 0xe281bc00) { return F_true; } - // Superscripts and Subscripts: U+208A, U+208C. - if (character == 0xe2828a00 || character == 0xe2828c00) { + // Superscripts and Subscripts: U+208A to U+208C. + if (character >= 0xe2828a00 && character <= 0xe2828c00) { return F_true; } @@ -370,68 +370,33 @@ extern "C" { return F_true; } - // Letterlike Symbols: U+213A, U+213B, U+214A, U+214C. - if (character == 0xe284ba00 || character == 0xe284bb00 || character == 0xe2858a00 || character == 0xe2858c00) { + // Letterlike Symbols: U+213A, U+213B. + if (character == 0xe284ba00 || character == 0xe284bb00) { return F_true; } - // Letterlike Symbols: U+214D, U+214F. - if (character == 0xe2858d00 || character == 0xe2858f00) { - return F_true; - } - - // Number Forms: U+218A, U+218B. - if (character == 0xe2868a00 || character == 0xe2868b00) { - return F_true; - } - - // Arrows: U+2195 to U+2199. - if (character >= 0xe2869500 && character <= 0xe2869900) { - return F_true; - } - - // Arrows: U+219C to U+219F. - if (character >= 0xe2869c00 && character <= 0xe2869f00) { - return F_true; - } - - // Arrows: U+21A1, U+21A2, U+21A4, U+21A5. - if (character == 0xe286a100 || character == 0xe286a200 || character == 0xe286a400 || character == 0xe286a500) { - return F_true; - } - - // Arrows: U+21A7 to U+21AD. - if (character >= 0xe286a700 && character <= 0xe286ad00) { - return F_true; - } - - // Arrows: U+21AF. - if (character == 0xe286af00) { - return F_true; - } - - // Arrows: U+21B0 to U+21CD. - if (character >= 0xe286af00 && character <= 0xe2878d00) { + // Letterlike Symbols: U+2140 to U+2144. + if (character >= 0xe2858000 && character <= 0xe2858400) { return F_true; } - // Arrows: U+21D0, U+21D1, U+21D3. - if (character == 0xe2879000 || character == 0xe2879100 || character == 0xe2879300) { + // Letterlike Symbols: U+214A to U+214D. + if (character >= 0xe2858a00 && character <= 0xe2858d00) { return F_true; } - // Arrows: U+21D5 to U+21F3. - if (character >= 0xe2879500 && character <= 0xe287b300) { + // Letterlike Symbols: U+214F. + if (character == 0xe2858f00) { return F_true; } - // Miscellaneous Technical: U+2300 to U+2307. - if (character >= 0xe28c8000 && character == 0xe28c8700) { + // Number Forms: U+218A, U+218B. + if (character == 0xe2868a00 || character == 0xe2868b00) { return F_true; } - // Miscellaneous Technical: U+230C to U+2328. - if (character >= 0xe28c8c00 && character <= 0xe28ca800) { + // Arrows to Miscellaneous Technical: U+2190 to U+2328. + if (character >= 0xe2869000 && character <= 0xe28ca800) { return F_true; } @@ -532,13 +497,75 @@ extern "C" { } else if (macro_f_utf_char_t_to_char_1(character) == 0xe3) { + // CJK Symbols and Punctuation: U+3004, U+3012, U+3013, U+3020. + if (character == 0xe3808400 || character == 0xe3809200 || character == 0xe3809300 || character == 0xe380a000) { + return F_true; + } + + // CJK Symbols and Punctuation: U+3036, U+3037, U+303E, U+303F. + if (character == 0xe380b600 || character == 0xe380b700 || character == 0xe380be00 || character == 0xe380bf00) { + return F_true; + } + // Hiragana: U+309B, U+309C. if (character == 0xe3829b00 || character == 0xe3829c00) { return F_true; } + + // Kanbun: U+3190, U+3191. + if (character == 0xe3869000 || character == 0xe3869100) { + return F_true; + } + + // Kanbun to CJK Strokes: U+3196 to U+31E3. + if (character >= 0xe3869600 && character <= 0xe387a300) { + return F_true; + } + + // Enclosed CJK Letters and Months: U+3200 to U+321E. + if (character >= 0xe3888000 && character <= 0xe3889e00) { + return F_true; + } + + // Enclosed CJK Letters and Months: U+322A to U+3247. + if (character >= 0xe388aa00 && character <= 0xe3898700) { + return F_true; + } + + // Enclosed CJK Letters and Months: U+3250. + if (character == 0xe3899000) { + return F_true; + } + + // Enclosed CJK Letters and Months: U+3260 to U+327F. + if (character >= 0xe389a000 && character <= 0xe389bf00) { + return F_true; + } + + // Enclosed CJK Letters and Months: U+328A to U+32B0. + if (character >= 0xe38a8a00 && character <= 0xe38ab000) { + return F_true; + } + + // Enclosed CJK Letters and Months to CJK Compatibility: U+32C0 to U+33FF. + if (character >= 0xe38b8000 && character <= 0xe38fbf00) { + return F_true; + } + } + else if (macro_f_utf_char_t_to_char_1(character) == 0xe4) { + + // Yijing Hexagram Symbols: U+4DC0 to U+4DFF. + if (character >= 0xe4b78000 && character <= 0xe4b7bf00) { + return F_true; + } } else if (macro_f_utf_char_t_to_char_1(character) == 0xea) { + // Yi Radicals: U+A490 to U+A4C6. + if (character >= 0xea929000 && character <= 0xea938600) { + return F_true; + } + // Modifier Tone Letters: U+A700 to U+A716. if (character >= 0xea9c8000 && character <= 0xea9c9600) { return F_true; @@ -554,8 +581,18 @@ extern "C" { return F_true; } - // Common Indic Number Forms: U+A838. - if (character == 0xeaa0b800) { + // Syloti Nagri: U+A828 to U+A82B. + if (character >= 0xeaa0a800 && character <= 0xeaa0ab00) { + return F_true; + } + + // Common Indic Number Forms: U+A836 to U+A839. + if (character >= 0xeaa0b600 && character <= 0xeaa0b900) { + return F_true; + } + + // Myanmar Extended-A: U+AA77 to U+AA79. + if (character >= 0xeaa9b700 && character <= 0xeaa9b900) { return F_true; } @@ -576,8 +613,18 @@ extern "C" { return F_true; } - // Arabic Presentation Forms-A: U+FDFC. - if (character == 0xefb7bc00) { + // Arabic Presentation Forms-A: U+FD40 to U+FD4F. + if (character >= 0xefb58000 && character <= 0xefb58f00) { + return F_true; + } + + // Arabic Presentation Forms-A: U+FDCF. + if (character == 0xefb78f00) { + return F_true; + } + + // Arabic Presentation Forms-A: U+FDFC to U+FDFF. + if (character >= 0xefb7bc00 && character <= 0xefb7bf00) { return F_true; } @@ -606,18 +653,13 @@ extern "C" { return F_true; } - // Halfwidth and Fullwidth Forms: U+FF3E, U+FF40, . + // Halfwidth and Fullwidth Forms: U+FF3E, U+FF40. if (character == 0xefbcbe00 || character == 0xefbd8000) { return F_true; } - // Halfwidth and Fullwidth Forms: U+FFE0, U+FFE1, U+FFE3, U+FFE5. - if (character == 0xefbfa000 || character == 0xefbfa100 || character == 0xefbfa300 || character == 0xefbfa500) { - return F_true; - } - - // Halfwidth and Fullwidth Forms: U+FFE6. - if (character == 0xefbfa600) { + // Halfwidth and Fullwidth Forms: U+FFE0 to U+FFE6. + if (character >= 0xefbfa000 && character <= 0xefbfa600) { return F_true; } @@ -631,7 +673,7 @@ extern "C" { return F_true; } - // Specials: U+FFFC to U+FFFD. + // Specials: U+FFFC, U+FFFD. if (character == 0xefbfbc00 || character == 0xefbfbd00) { return F_true; } @@ -642,10 +684,62 @@ extern "C" { if (macro_f_utf_char_t_to_char_1(character) == 0xf0) { - if (macro_f_utf_char_t_to_char_2(character) == 0x91) { + if (macro_f_utf_char_t_to_char_2(character) == 0x90) { + + // Aegean Numbers: U+10137 to U+1013F. + if (character >= 0xf09084b7 && character <= 0xf09084bf) { + return F_true; + } + + // Ancient Greek Numbers: U+10179 to U+10189. + if (character >= 0xf09085b9 && character <= 0xf0908689) { + return F_true; + } + + // Ancient Greek Numbers: U+1018C to U+1018E. + if (character >= 0xf090868c && character <= 0xf090868e) { + return F_true; + } + + // Ancient Symbols: U+10190 to U+1019C. + if (character >= 0xf0908690 && character <= 0xf090869c) { + return F_true; + } + + // Ancient Symbols: U+10190 to U+1019C. + if (character >= 0xf0908690 && character <= 0xf090869c) { + return F_true; + } + + // Ancient Symbols: U+101A0. + if (character == 0xf09086a0) { + return F_true; + } + + // Phaistos Disc: U+101D0 to U+101FC. + if (character >= 0xf0908790 && character <= 0xf09087bc) { + return F_true; + } - // Tamil Supplement: U+11FDD to U+11FE0. - if (character >= 0xf091bf9d && character <= 0xf091bfa0) { + // Palmyrene: U+10877, U+10878. + if (character == 0xf090a1b7 || character == 0xf090a1b8) { + return F_true; + } + + // Manichaean: U+10AC8. + if (character == 0xf090ab88) { + return F_true; + } + } + else if (macro_f_utf_char_t_to_char_2(character) == 0x91) { + + // Ahom: U+1173F. + if (character == 0xf0919cbf) { + return F_true; + } + + // Tamil Supplement: U+11FD5 to U+11FF1. + if (character >= 0xf091bf95 && character <= 0xf091bfb1) { return F_true; } @@ -664,22 +758,265 @@ extern "C" { return F_true; } } + else if (macro_f_utf_char_t_to_char_2(character) == 0x96) { + + // Pahawh Hmong: U+16B3C to U+16B3F. + if (character >= 0xf096acbc && character <= 0xf096acbf) { + return F_true; + } + + // Pahawh Hmong: U+16B45. + if (character == 0xf096ad85) { + return F_true; + } + } + else if (macro_f_utf_char_t_to_char_2(character) == 0x9b) { + + // Duployan: U+1BC9C. + if (character == 0xf09bb29c) { + return F_true; + } + } + else if (macro_f_utf_char_t_to_char_2(character) == 0x9c) { + + // Znamenny Musical Notation: U+1CF50 to U+1CFC3. + if (character >= 0xf09cbd90 && character <= 0xf09cbf83) { + return F_true; + } + } + else if (macro_f_utf_char_t_to_char_2(character) == 0x9d) { + + // Byzantine Musical Notation: U+1D000 to U+1D1EA. + if (character >= 0xf09d8080 && character <= 0xf09d87aa) { + return F_true; + } + + // Ancient Greek Musical Notation: U+1D200 to U+1D245. + if (character >= 0xf09d8080 && character <= 0xf09d8985) { + return F_true; + } + + // Tai Xuan Jing Symbols: U+1D300 to U+1D356. + if (character >= 0xf09d8c80 && character <= 0xf09d8d96) { + return F_true; + } + + // Mathematical Alphanumeric Symbols: U+1D6C1, U+1D6DB, U+1D6FB, U+1D715. + if (character == 0xf09d9b81 || character == 0xf09d9b9b || character == 0xf09d9bbb || character == 0xf09d9c95) { + return F_true; + } + + // Mathematical Alphanumeric Symbols: U+1D735, U+1D74F, U+1D76F, U+1D789. + if (character == 0xf09d9cb5 || character == 0xf09d9d8f || character == 0xf09d9daf || character == 0xf09d9e89) { + return F_true; + } + + // Mathematical Alphanumeric Symbols: U+1D7A9, U+1D7C3. + if (character == 0xf09d9ea9 || character == 0xf09d9f83) { + return F_true; + } + + // Sutton SignWriting: U+1D800 to U+1D9FF. + if (character >= 0xf09da080 && character <= 0xf09da7bf) { + return F_true; + } + + // Sutton SignWriting: U+1DA37 to U+1DA3A. + if (character >= 0xf09da8b7 && character <= 0xf09da8ba) { + return F_true; + } + + // Sutton SignWriting: U+1DA6D to U+1DA86. + if (character >= 0xf09da9ad && character <= 0xf09daa86) { + return F_true; + } + } else if (macro_f_utf_char_t_to_char_2(character) == 0x9e) { + // Nyiakeng Puachue Hmong: U+1E14F. + if (character == 0xf09e858f) { + return F_true; + } + // Wancho: U+1E2FF. if (character == 0xf09e8bbf) { return F_true; } - // Indic Siyaq Numbers: U+ECB0. - if (character == 0xf09eb2b0) { + // Indic Siyaq Numbers: U+1ECAC, U+ECB0. + if (character == 0xf09eb2ac || character == 0xf09eb2b0) { + return F_true; + } + + // Ottoman Siyaq Numbers: U+1ED2E. + if (character == 0xf09eb4ae) { + return F_true; + } + + // Arabic Mathematical Alphabetic Symbols: U+1EEF0, U+1EEF1. + if (character == 0xf09ebbb0 || character == 0xf09ebbb1) { return F_true; } } else if (macro_f_utf_char_t_to_char_2(character) == 0x9f) { - // Miscellaneous Symbols and Pictographs: U+1F3FB to U+1F3FF. - if (character >= 0xf09f8fbb && character <= 0xf09f8fbf) { + // Mahjong Tiles to Domino Tiles: U+1F000 to U+1F093. + if (character >= 0xf09f8080 && character <= 0xf09f8293) { + return F_true; + } + + // Playing Cards: U+1F0A0 to U+1F0AE. + if (character >= 0xf09f82a0 && character <= 0xf09f82ae) { + return F_true; + } + + // Playing Cards: U+1F0B1 to U+1F0BF. + if (character >= 0xf09f82b1 && character <= 0xf09f82bf) { + return F_true; + } + + // Playing Cards: U+1F0C1 to U+1F0CF. + if (character >= 0xf09f8381 && character <= 0xf09f838f) { + return F_true; + } + + // Enclosed Alphanumeric Supplement: U+1F0D1 to U+1F1AD. + if (character >= 0xf09f8391 && character <= 0xf09f86ad) { + return F_true; + } + + // Enclosed Alphanumeric Supplement: U+1F10D to U+1F1AD. + if (character >= 0xf09f848d && character <= 0xf09f86ad) { + return F_true; + } + + // Enclosed Alphanumeric Supplement to Enclosed Ideographic Supplement: U+1F1E6 to U+1F202. + if (character >= 0xf09f87a6 && character <= 0xf09f8882) { + return F_true; + } + + // Enclosed Ideographic Supplement: U+1F210 to U+1F23B. + if (character >= 0xf09f8890 && character <= 0xf09f88bb) { + return F_true; + } + + // Enclosed Ideographic Supplement: U+1F240 to U+1F248. + if (character >= 0xf09f8980 && character <= 0xf09f8988) { + return F_true; + } + + // Enclosed Ideographic Supplement: U+1F250, U+1F251. + if (character == 0xf09f8990 || character == 0xf09f8991) { + return F_true; + } + + // Enclosed Ideographic Supplement: U+1F260 to U+1F265. + if (character >= 0xf09f89a0 && character <= 0xf09f89a5) { + return F_true; + } + + // Miscellaneous Symbols and Pictographs: U+1F300 to U+1F6EC. + if (character >= 0xf09f8c80 && character <= 0xf09f9bac) { + return F_true; + } + + // Transport and Map Symbols: U+1F6F0 to U+1F6FC. + if (character >= 0xf09f9bb0 && character <= 0xf09f9bbc) { + return F_true; + } + + // Alchemical Symbols: U+1F6F0 to U+1F773. + if (character >= 0xf09f9c80 && character <= 0xf09f9db3) { + return F_true; + } + + // Geometric Shapes Extended: U+1F780 to U+1F7D8. + if (character >= 0xf09f9e80 && character <= 0xf09f9f98) { + return F_true; + } + + // Geometric Shapes Extended: U+1F7E0 to U+1F7EB. + if (character >= 0xf09f9fa0 && character <= 0xf09f9fab) { + return F_true; + } + + // Geometric Shapes Extended: U+1F7F0. + if (character == 0xf09f9fb0) { + return F_true; + } + + // Supplemental Arrows-C: U+1F800 to U+1F80B. + if (character >= 0xf09fa080 && character <= 0xf09fa08b) { + return F_true; + } + + // Supplemental Arrows-C: U+1F810 to U+1F847. + if (character >= 0xf09fa090 && character <= 0xf09fa187) { + return F_true; + } + + // Supplemental Arrows-C: U+1F850 to U+1F887. + if (character >= 0xf09fa190 && character <= 0xf09fa287) { + return F_true; + } + + // Supplemental Arrows-C: U+1F890 to U+1F8AD. + if (character >= 0xf09fa290 && character <= 0xf09fa2ad) { + return F_true; + } + + // Supplemental Arrows-C: U+1F8B0, U+1F8B1. + if (character == 0xf09fa2b0 || character == 0xf09fa2b1) { + return F_true; + } + + // Supplemental Symbols and Pictographs to Chess Symbols: U+1F900 to U+1FA53. + if (character >= 0xf09fa480 && character <= 0xf09fa993) { + return F_true; + } + + // Chess Symbols: U+1FA60 to U+1FA6D. + if (character >= 0xf09fa9a0 && character <= 0xf09fa9ad) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FA70 to U+1FA74. + if (character >= 0xf09fa9b0 && character <= 0xf09fa9b4) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FA78 to U+1FA7C. + if (character >= 0xf09fa9b8 && character <= 0xf09fa9bc) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FA80 to U+1FA86. + if (character >= 0xf09faa80 && character <= 0xf09faa86) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FA90 to U+1FAAC. + if (character >= 0xf09faa90 && character <= 0xf09faaac) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FAB0 to U+1FAC5. + if (character >= 0xf09faab0 && character <= 0xf09fab85) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FAD0 to U+1FAE7. + if (character >= 0xf09fab90 && character <= 0xf09faba7) { + return F_true; + } + + // Symbols and Pictographs Extended-A: U+1FAF0 to U+1FAF6. + if (character >= 0xf09fabb0 && character <= 0xf09fabb6) { + return F_true; + } + + // Symbols for Legacy Computing: U+1FB00 to U+1FBCA. + if (character >= 0xf09fac80 && character <= 0xf09faf8a) { return F_true; } } diff --git a/level_0/f_utf/c/private-utf_valid.c b/level_0/f_utf/c/private-utf_valid.c index 6331429..5404abb 100644 --- a/level_0/f_utf/c/private-utf_valid.c +++ b/level_0/f_utf/c/private-utf_valid.c @@ -9,6 +9,15 @@ extern "C" { #if !defined(_di_f_utf_character_is_valid_) || !defined(_di_f_utf_is_valid_) f_status_t private_f_utf_character_is_valid(const f_utf_char_t character) { + // All characters with data after the width bytes is invalid. + if (macro_f_utf_char_t_width(character) == 2 && (macro_f_utf_char_t_to_char_3(character) || macro_f_utf_char_t_to_char_4(character))) { + return F_false; + } + + if (macro_f_utf_char_t_width(character) == 3 && macro_f_utf_char_t_to_char_4(character)) { + return F_false; + } + // Invalid: 11111xxx xxxxxxxx xxxxxxxx xxxxxxxx. if ((macro_f_utf_char_t_to_char_1(character) & 0b11111000) == 0b11111000) { return F_false; @@ -47,11 +56,11 @@ extern "C" { } // Valid: 0xxxxxxx ???????? ???????? ????????. - else if (!(macro_f_utf_char_t_to_char_1(character) & 0b10000000)) { - return F_true; + else if (macro_f_utf_char_t_to_char_1(character) & 0b10000000) { + return F_false; } - return F_false; + return F_true; } #endif // !defined(_di_f_utf_character_is_valid_) || !defined(_di_f_utf_is_valid_) diff --git a/level_0/f_utf/c/utf/common.h b/level_0/f_utf/c/utf/common.h index ff4634f..9af76f5 100644 --- a/level_0/f_utf/c/utf/common.h +++ b/level_0/f_utf/c/utf/common.h @@ -173,6 +173,9 @@ extern "C" { * The macro_f_utf_char_t_width macro determines a width of the UTF-8 character based on macro_f_utf_byte_width. * The macro_f_utf_char_t_width_is is identical to macro_f_utf_char_t_width, except it returns 0 when character is ASCII. * + * The macros that end in "_be" or "_le" represent "big endian" and "little endian". + * The default macros without the "_be" should be in "big endian" because the strings are always stored as if they were "big endian" without regard to the host byte order. + * * @see f_utf_is_big_endian() */ #ifndef _di_f_utf_char_t_ @@ -182,67 +185,95 @@ extern "C" { #define macro_f_utf_char_t_initialize(code) code - #ifdef _is_F_endian_big - #define F_utf_char_mask_byte_1_d 0x000000ff // 1111 1111, 0000 0000, 0000 0000, 0000 0000 - #define F_utf_char_mask_byte_2_d 0x0000ffff // 1111 1111, 1111 1111, 0000 0000, 0000 0000 - #define F_utf_char_mask_byte_3_d 0x00ffffff // 1111 1111, 1111 1111, 1111 1111, 0000 0000 - #define F_utf_char_mask_byte_4_d 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111 - - #define F_utf_char_mask_char_1_d 0x000000ff // 1111 1111, 0000 0000, 0000 0000, 0000 0000 - #define F_utf_char_mask_char_2_d 0x0000ff00 // 0000 0000, 1111 1111, 0000 0000, 0000 0000 - #define F_utf_char_mask_char_3_d 0x00ff0000 // 0000 0000, 0000 0000, 1111 1111, 0000 0000 - #define F_utf_char_mask_char_4_d 0xff000000 // 0000 0000, 0000 0000, 0000 0000, 1111 1111 - - #define macro_f_utf_char_t_to_char_1(character) ((character) & F_utf_char_mask_char_1_d) // Grab first byte. - #define macro_f_utf_char_t_to_char_2(character) (((character) & F_utf_char_mask_char_2_d) << 8) // Grab second byte. - #define macro_f_utf_char_t_to_char_3(character) (((character) & F_utf_char_mask_char_3_d) << 16) // Grab third byte. - #define macro_f_utf_char_t_to_char_4(character) (((character) & F_utf_char_mask_char_4_d) << 24) // Grab fourth byte. - - #define macro_f_utf_char_t_from_char_1(character) ((character) & F_utf_char_mask_char_1_d) // Shift to first byte. - #define macro_f_utf_char_t_from_char_2(character) (((character) << 8) & F_utf_char_mask_char_2_d) // Shift to second byte. - #define macro_f_utf_char_t_from_char_3(character) (((character) << 16) & F_utf_char_mask_char_3_d) // Shift to third byte. - #define macro_f_utf_char_t_from_char_4(character) (((character) << 24) & F_utf_char_mask_char_4_d) // Shift to fourth byte. - #else - #define F_utf_char_mask_byte_1_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 - #define F_utf_char_mask_byte_2_d 0xffff0000 // 1111 1111, 1111 1111, 0000 0000, 0000 0000 - #define F_utf_char_mask_byte_3_d 0xffffff00 // 1111 1111, 1111 1111, 1111 1111, 0000 0000 - #define F_utf_char_mask_byte_4_d 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111 - - #define F_utf_char_mask_char_1_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 - #define F_utf_char_mask_char_2_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000 - #define F_utf_char_mask_char_3_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000 - #define F_utf_char_mask_char_4_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 - - #define macro_f_utf_char_t_to_char_1(character) (((character) & F_utf_char_mask_char_1_d) >> 24) // Grab first byte. - #define macro_f_utf_char_t_to_char_2(character) (((character) & F_utf_char_mask_char_2_d) >> 16) // Grab second byte. - #define macro_f_utf_char_t_to_char_3(character) (((character) & F_utf_char_mask_char_3_d) >> 8) // Grab third byte. - #define macro_f_utf_char_t_to_char_4(character) ((character) & F_utf_char_mask_char_4_d) // Grab fourth byte. - - #define macro_f_utf_char_t_from_char_1(character) (((character) << 24) & F_utf_char_mask_char_1_d) // Shift to first byte. - #define macro_f_utf_char_t_from_char_2(character) (((character) << 16) & F_utf_char_mask_char_2_d) // Shift to second byte. - #define macro_f_utf_char_t_from_char_3(character) (((character) << 8) & F_utf_char_mask_char_3_d) // Shift to third byte. - #define macro_f_utf_char_t_from_char_4(character) ((character) & F_utf_char_mask_char_4_d) // Shift to fourth byte. - #endif // _is_F_endian_big - - #define macro_f_utf_char_t_width(character) (macro_f_utf_byte_width(macro_f_utf_char_t_to_char_1(character))) - #define macro_f_utf_char_t_width_is(character) (macro_f_utf_byte_width_is(macro_f_utf_char_t_to_char_1(character))) + // Big Endian. + #define F_utf_char_mask_byte_1_be_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 + #define F_utf_char_mask_byte_2_be_d 0xffff0000 // 1111 1111, 1111 1111, 0000 0000, 0000 0000 + #define F_utf_char_mask_byte_3_be_d 0xffffff00 // 1111 1111, 1111 1111, 1111 1111, 0000 0000 + #define F_utf_char_mask_byte_4_be_d 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111 + + #define F_utf_char_mask_char_1_be_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 + #define F_utf_char_mask_char_2_be_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000 + #define F_utf_char_mask_char_3_be_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000 + #define F_utf_char_mask_char_4_be_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 + + #define macro_f_utf_char_t_to_char_1_be(character) (((character) & F_utf_char_mask_char_1_be_d) >> 24) // Grab first byte. + #define macro_f_utf_char_t_to_char_2_be(character) (((character) & F_utf_char_mask_char_2_be_d) >> 16) // Grab second byte. + #define macro_f_utf_char_t_to_char_3_be(character) (((character) & F_utf_char_mask_char_3_be_d) >> 8) // Grab third byte. + #define macro_f_utf_char_t_to_char_4_be(character) ((character) & F_utf_char_mask_char_4_be_d) // Grab fourth byte. + + #define macro_f_utf_char_t_from_char_1_be(character) (((character) << 24) & F_utf_char_mask_char_1_be_d) // Shift to first byte. + #define macro_f_utf_char_t_from_char_2_be(character) (((character) << 16) & F_utf_char_mask_char_2_be_d) // Shift to second byte. + #define macro_f_utf_char_t_from_char_3_be(character) (((character) << 8) & F_utf_char_mask_char_3_be_d) // Shift to third byte. + #define macro_f_utf_char_t_from_char_4_be(character) ((character) & F_utf_char_mask_char_4_be_d) // Shift to fourth byte. + + // Little Endian. + #define F_utf_char_mask_byte_1_le_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 + #define F_utf_char_mask_byte_2_le_d 0x0000ffff // 0000 0000, 0000 0000, 1111 1111, 1111 1111 + #define F_utf_char_mask_byte_3_le_d 0x00ffffff // 0000 0000, 1111 1111, 1111 1111, 1111 1111 + #define F_utf_char_mask_byte_4_le_d 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111 + + #define F_utf_char_mask_char_1_le_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 + #define F_utf_char_mask_char_2_le_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000 + #define F_utf_char_mask_char_3_le_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000 + #define F_utf_char_mask_char_4_le_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 + + #define macro_f_utf_char_t_to_char_1_le(character) ((character) & F_utf_char_mask_char_1_le_d) // Grab first byte. + #define macro_f_utf_char_t_to_char_2_le(character) (((character) & F_utf_char_mask_char_2_le_d) << 8) // Grab second byte. + #define macro_f_utf_char_t_to_char_3_le(character) (((character) & F_utf_char_mask_char_3_le_d) << 16) // Grab third byte. + #define macro_f_utf_char_t_to_char_4_le(character) (((character) & F_utf_char_mask_char_4_le_d) << 24) // Grab fourth byte. + + #define macro_f_utf_char_t_from_char_1_le(character) ((character) & F_utf_char_mask_char_1_le_d) // Shift to first byte. + #define macro_f_utf_char_t_from_char_2_le(character) (((character) << 8) & F_utf_char_mask_char_2_le_d) // Shift to second byte. + #define macro_f_utf_char_t_from_char_3_le(character) (((character) << 16) & F_utf_char_mask_char_3_le_d) // Shift to third byte. + #define macro_f_utf_char_t_from_char_4_le(character) (((character) << 24) & F_utf_char_mask_char_4_le_d) // Shift to fourth byte. + + #define F_utf_char_mask_byte_1_d F_utf_char_mask_byte_1_be_d + #define F_utf_char_mask_byte_2_d F_utf_char_mask_byte_2_be_d + #define F_utf_char_mask_byte_3_d F_utf_char_mask_byte_3_be_d + #define F_utf_char_mask_byte_4_d F_utf_char_mask_byte_4_be_d + + #define F_utf_char_mask_char_1_d F_utf_char_mask_char_1_be_d + #define F_utf_char_mask_char_2_d F_utf_char_mask_char_2_be_d + #define F_utf_char_mask_char_3_d F_utf_char_mask_char_3_be_d + #define F_utf_char_mask_char_4_d F_utf_char_mask_char_4_be_d + + #define macro_f_utf_char_t_to_char_1(character) macro_f_utf_char_t_to_char_1_be(character) + #define macro_f_utf_char_t_to_char_2(character) macro_f_utf_char_t_to_char_2_be(character) + #define macro_f_utf_char_t_to_char_3(character) macro_f_utf_char_t_to_char_3_be(character) + #define macro_f_utf_char_t_to_char_4(character) macro_f_utf_char_t_to_char_4_be(character) + + #define macro_f_utf_char_t_from_char_1(character) macro_f_utf_char_t_from_char_1_be(character) + #define macro_f_utf_char_t_from_char_2(character) macro_f_utf_char_t_from_char_2_be(character) + #define macro_f_utf_char_t_from_char_3(character) macro_f_utf_char_t_from_char_3_be(character) + #define macro_f_utf_char_t_from_char_4(character) macro_f_utf_char_t_from_char_4_be(character) + + #define macro_f_utf_char_t_width(character) (macro_f_utf_byte_width(macro_f_utf_char_t_to_char_1_be(character))) + #define macro_f_utf_char_t_width_is(character) (macro_f_utf_byte_width_is(macro_f_utf_char_t_to_char_1_be(character))) #endif // _di_f_utf_char_t_ /** * Provide f_utf_char_t special "characters". * * These strings are created via digits and then cast. + * + * The macros that end in "_be" or "_le" represent "big endian" and "little endian". + * The default macros without the "_be" should be in "big endian" because the strings are always stored as if they were "big endian" without regard to the host byte order. */ #ifndef _di_f_utf_char_t_codes_ - #ifdef _is_F_endian_big - #define F_utf_char_t_eol_s 0x0000000a // 0000 0000, 0000 0000, 0000 0000, 0000 1010 - #define F_utf_char_t_eos_s 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000 - #define F_utf_char_t_placeholder_s 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000 - #else - #define F_utf_char_t_eol_s 0x0a000000 // 0000 1010, 0000 0000, 0000 0000, 0000 0000 - #define F_utf_char_t_eos_s 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000 - #define F_utf_char_t_placeholder_s 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000 - #endif // _is_F_endian_big + + // Big Endian. + #define F_utf_char_t_eol_be_s 0x0a000000 // 0000 1010, 0000 0000, 0000 0000, 0000 0000 + #define F_utf_char_t_eos_be_s 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000 + #define F_utf_char_t_placeholder_be_s 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000 + + // Little Endian. + #define F_utf_char_t_eol_le_s 0x0000000a // 0000 0000, 0000 0000, 0000 0000, 0000 1010 + #define F_utf_char_t_eos_le_s 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000 + #define F_utf_char_t_placeholder_le_s 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000 + + #define F_utf_char_t_eol_s F_utf_char_t_eol_be_s + #define F_utf_char_t_eos_s F_utf_char_t_eos_be_s + #define F_utf_char_t_placeholder_s F_utf_char_t_placeholder_be_s extern const f_utf_char_t f_utf_char_t_eol_s; extern const f_utf_char_t f_utf_char_t_eos_s; diff --git a/level_0/f_utf/c/utf/is.c b/level_0/f_utf/c/utf/is.c index a543c0a..c0825b3 100644 --- a/level_0/f_utf/c/utf/is.c +++ b/level_0/f_utf/c/utf/is.c @@ -207,7 +207,11 @@ extern "C" { return private_f_utf_character_is_control(character_utf); } - return iscntrl(*character); + if (iscntrl(*character)) { + return F_true; + } + + return F_false; } #endif // _di_f_utf_is_control_ diff --git a/level_0/f_utf/c/utf/is_character.c b/level_0/f_utf/c/utf/is_character.c index f0a0330..5b78ffd 100644 --- a/level_0/f_utf/c/utf/is_character.c +++ b/level_0/f_utf/c/utf/is_character.c @@ -462,7 +462,12 @@ extern "C" { return private_f_utf_character_is_valid(character); } - return F_true; + // All characters with data after the width bytes is invalid. + if (macro_f_utf_char_t_from_char_1(macro_f_utf_char_t_to_char_1(character)) == character) { + return F_true; + } + + return F_false; } #endif // _di_f_utf_character_is_valid_ diff --git a/level_0/f_utf/c/utf/is_character.h b/level_0/f_utf/c/utf/is_character.h index dbc7263..08c71ce 100644 --- a/level_0/f_utf/c/utf/is_character.h +++ b/level_0/f_utf/c/utf/is_character.h @@ -157,6 +157,8 @@ extern "C" { * * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones. * + * This does not include Control format characters. + * * @param character * The character to validate. * diff --git a/level_0/f_utf/data/tests/bytesequences/combining-all.txt b/level_0/f_utf/data/tests/bytesequences/combining-all.txt index b42e082..085bbe3 100644 --- a/level_0/f_utf/data/tests/bytesequences/combining-all.txt +++ b/level_0/f_utf/data/tests/bytesequences/combining-all.txt @@ -203,19 +203,12 @@ 55711 55728 56214 -56214 -56215 56215 56216 -56216 -56217 56217 56218 -56218 -56219 56219 56220 -56220 56223 56224 56225 @@ -230,59 +223,32 @@ 56237 56465 56496 -56496 -56497 56497 56498 -56498 -56499 56499 56500 -56500 -56501 56501 56502 -56502 56503 -56503 -56504 56504 56505 -56505 -56506 56506 56507 -56507 -56508 56508 56509 -56509 56510 -56510 -56511 56511 56704 -56704 56705 -56705 -56706 56706 56707 -56707 56708 -56708 -56709 56709 56710 -56710 -56711 56711 56712 -56712 -56713 56713 56714 -56714 56998 56999 57000 @@ -676,10 +642,8 @@ 14779021 14779037 4036012161 -4036012161 4036012162 4036012163 -4036012163 4036012165 4036012166 4036012172 @@ -746,11 +710,8 @@ 4036069053 4036069054 4036069258 -4036069258 -4036069259 4036069259 4036069260 -4036069260 4036069551 4036069552 4036069553 @@ -758,7 +719,6 @@ 4036069558 4036069559 4036070303 -4036070303 4036070307 4036070308 4036070309 @@ -1255,14 +1215,9 @@ 14792633 14792634 14792635 -14792635 14792636 -14792636 -14792637 14792637 14792638 -14792638 -14792639 14792639 4036928400 4036928401 @@ -1309,7 +1264,6 @@ 14909612 14909613 14910105 -14910105 14910106 15374767 15374768 @@ -1360,509 +1314,14 @@ 15375006 15375007 15375280 -15375280 -15375281 15375281 -15375282 -15375283 -15375284 -15375285 -15375286 -15375287 -15375288 -15375289 -15375290 -15375291 -15375292 -15375293 -15375294 -15375295 -15375488 -15375489 -15375490 -15375491 -15375492 -15375493 -15375494 -15375495 -15375496 -15375497 -15375498 -15375499 -15375500 -15375501 -15375502 -15375503 -15375504 -15375505 -15375506 -15375507 -15375508 -15375509 -15375510 -15375511 -15375512 -15375513 -15375514 -15375515 -15375516 -15375517 -15375518 -15375519 -15375520 -15375521 -15375522 -15375523 -15375524 -15375525 -15375526 -15375527 -15375528 -15375529 -15375530 -15375531 -15375532 -15375533 -15375534 -15375535 -15375536 -15375537 -15375538 -15375539 -15375540 -15375541 -15375542 -15375543 -15375544 -15375545 -15375546 -15375547 -15375548 -15375549 -15375550 -15375551 -15375744 -15375745 -15375746 -15375747 -15375748 -15375749 -15375750 -15375751 -15375752 -15375753 -15375754 -15375755 -15375756 -15375757 -15375758 -15375759 -15375760 -15375761 -15375762 -15375763 -15375764 -15375765 -15375766 -15375767 -15375768 -15375769 -15375770 -15375771 -15375772 -15375773 -15375774 -15375775 -15375776 -15375777 -15375778 -15375779 -15375780 -15375781 -15375782 -15375783 -15375784 -15375785 -15375786 -15375787 -15375788 -15375789 -15375790 -15375791 -15375792 -15375793 -15375794 -15375795 -15375796 -15375797 -15375798 -15375799 -15375800 -15375801 -15375802 -15375803 -15375804 -15375805 -15375806 -15375807 -15376000 -15376001 -15376002 -15376003 -15376004 -15376005 -15376006 -15376007 -15376008 -15376009 -15376010 -15376011 -15376012 -15376013 -15376014 -15376015 -15376016 -15376017 -15376018 -15376019 -15376020 -15376021 -15376022 -15376023 -15376024 -15376025 -15376026 -15376027 -15376028 -15376029 -15376030 -15376031 -15376032 -15376033 -15376034 -15376035 -15376036 -15376037 -15376038 -15376039 -15376040 -15376041 -15376042 -15376043 -15376044 -15376045 -15376046 -15376047 -15376048 -15376049 -15376050 -15376051 -15376052 -15376053 -15376054 -15376055 -15376056 -15376057 -15376058 -15376059 -15376060 -15376061 -15376062 -15376063 -15376256 -15376257 -15376258 -15376259 -15376260 -15376261 -15376262 -15376263 -15376264 -15376265 -15376266 -15376267 -15376268 -15376269 -15376270 -15376271 -15376272 -15376273 -15376274 -15376275 -15376276 -15376277 -15376278 -15376279 -15376280 -15376281 -15376282 -15376283 -15376284 -15376285 -15376286 -15376287 -15376288 -15376289 -15376290 -15376291 -15376292 -15376293 -15376294 -15376295 -15376296 -15376297 -15376298 -15376299 -15376300 -15376301 -15376302 -15376303 -15376304 -15376305 -15376306 -15376307 -15376308 -15376309 -15376310 -15376311 -15376312 -15376313 -15376314 -15376315 -15376316 -15376317 -15376318 -15376319 -15376512 -15376513 15376514 -15376514 -15376515 -15376516 -15376517 -15376518 15376518 -15376519 -15376520 -15376521 -15376522 -15376523 15376523 -15376524 -15376525 -15376526 -15376527 -15376528 -15376529 -15376530 -15376531 -15376532 -15376533 -15376534 -15376535 -15376536 -15376537 -15376538 -15376539 -15376540 -15376541 -15376542 -15376543 -15376544 -15376545 -15376546 -15376547 -15376548 15376549 -15376549 -15376550 15376550 -15376551 -15376552 -15376553 -15376554 -15376555 -15376556 -15376557 -15376558 -15376559 -15376560 -15376561 -15376562 -15376563 -15376564 -15376565 -15376566 -15376567 -15376568 -15376569 -15376570 -15376571 -15376572 -15376573 -15376574 -15376575 -15376768 -15376769 -15376770 -15376771 -15376772 -15376773 -15376774 -15376775 -15376776 -15376777 -15376778 -15376779 -15376780 -15376781 -15376782 -15376783 -15376784 -15376785 -15376786 -15376787 -15376788 -15376789 -15376790 -15376791 -15376792 -15376793 -15376794 -15376795 -15376796 -15376797 -15376798 -15376799 -15376800 -15376801 -15376802 -15376803 -15376804 -15376805 -15376806 -15376807 -15376808 -15376809 -15376810 -15376811 -15376812 -15376813 -15376814 -15376815 -15376816 -15376817 -15376818 -15376819 -15376820 -15376821 -15376822 -15376823 -15376824 -15376825 -15376826 -15376827 -15376828 -15376829 -15376830 -15376831 -15377024 -15377025 -15377026 -15377027 -15377028 -15377029 -15377030 -15377031 -15377032 -15377033 -15377034 -15377035 -15377036 -15377037 -15377038 -15377039 -15377040 -15377041 -15377042 -15377043 -15377044 -15377045 -15377046 -15377047 -15377048 -15377049 -15377050 -15377051 -15377052 -15377053 -15377054 -15377055 -15377056 -15377057 -15377058 -15377059 -15377060 -15377061 -15377062 -15377063 -15377064 -15377065 -15377066 -15377067 -15377068 -15377069 -15377070 -15377071 -15377072 -15377073 -15377074 -15377075 -15377076 -15377077 -15377078 -15377079 -15377080 -15377081 -15377082 -15377083 -15377084 -15377085 -15377086 -15377087 -15377280 -15377281 -15377282 -15377283 -15377284 15377284 15377285 -15377286 -15377287 -15377288 -15377289 -15377290 -15377291 -15377292 -15377293 -15377294 -15377295 -15377296 -15377297 -15377298 -15377299 -15377300 -15377301 -15377302 -15377303 -15377304 -15377305 -15377306 -15377307 -15377308 -15377309 -15377310 -15377311 15377312 15377313 15377314 diff --git a/level_0/f_utf/data/tests/bytesequences/emoji-all.txt b/level_0/f_utf/data/tests/bytesequences/emoji-all.txt index 51137ee..2c40578 100644 --- a/level_0/f_utf/data/tests/bytesequences/emoji-all.txt +++ b/level_0/f_utf/data/tests/bytesequences/emoji-all.txt @@ -4,7 +4,6 @@ 4036985743 4036986288 4036986289 -4036986302 4036986303 4036986510 4036986513 @@ -17,16 +16,6 @@ 4036986520 4036986521 4036986522 -4036986790 -4036986791 -4036986792 -4036986793 -4036986794 -4036986795 -4036986796 -4036986797 -4036986798 -4036986799 4036987009 4036987010 4036987034 @@ -76,8 +65,6 @@ 4036988063 4036988064 4036988065 -4036988066 -4036988067 4036988068 4036988069 4036988070 @@ -190,16 +177,11 @@ 4036988561 4036988562 4036988563 -4036988564 -4036988565 4036988566 4036988567 -4036988568 4036988569 4036988570 4036988571 -4036988572 -4036988573 4036988574 4036988575 4036988576 @@ -283,12 +265,9 @@ 4036988846 4036988847 4036988848 -4036988849 -4036988850 4036988851 4036988852 4036988853 -4036988854 4036988855 4036988856 4036988857 @@ -552,7 +531,6 @@ 4036989883 4036989884 4036989885 -4036989886 4036989887 4036990080 4036990081 @@ -616,24 +594,12 @@ 4036990139 4036990140 4036990141 -4036990142 -4036990143 -4036990336 -4036990337 -4036990338 -4036990339 -4036990340 -4036990341 -4036990342 -4036990343 -4036990344 4036990345 4036990346 4036990347 4036990348 4036990349 4036990350 -4036990351 4036990352 4036990353 4036990354 @@ -658,17 +624,8 @@ 4036990373 4036990374 4036990375 -4036990376 -4036990377 -4036990378 -4036990379 -4036990380 -4036990381 -4036990382 4036990383 4036990384 -4036990385 -4036990386 4036990387 4036990388 4036990389 @@ -677,133 +634,34 @@ 4036990392 4036990393 4036990394 -4036990395 -4036990396 -4036990397 -4036990398 -4036990399 -4036990592 -4036990593 -4036990594 -4036990595 -4036990596 -4036990597 -4036990598 4036990599 -4036990600 -4036990601 4036990602 4036990603 4036990604 4036990605 -4036990606 -4036990607 4036990608 -4036990609 -4036990610 -4036990611 -4036990612 4036990613 4036990614 -4036990615 -4036990616 -4036990617 -4036990618 -4036990619 -4036990620 -4036990621 -4036990622 -4036990623 -4036990624 -4036990625 -4036990626 -4036990627 4036990628 4036990629 -4036990630 -4036990631 4036990632 -4036990633 -4036990634 -4036990635 -4036990636 -4036990637 -4036990638 -4036990639 -4036990640 4036990641 4036990642 -4036990643 -4036990644 -4036990645 -4036990646 -4036990647 -4036990648 -4036990649 -4036990650 -4036990651 4036990652 -4036990653 -4036990654 -4036990655 -4036990848 -4036990849 4036990850 4036990851 4036990852 -4036990853 -4036990854 -4036990855 -4036990856 -4036990857 -4036990858 -4036990859 -4036990860 -4036990861 -4036990862 -4036990863 -4036990864 4036990865 4036990866 4036990867 -4036990868 -4036990869 -4036990870 -4036990871 -4036990872 -4036990873 -4036990874 -4036990875 4036990876 4036990877 4036990878 -4036990879 -4036990880 4036990881 -4036990882 4036990883 -4036990884 -4036990885 -4036990886 -4036990887 4036990888 -4036990889 -4036990890 -4036990891 -4036990892 -4036990893 -4036990894 4036990895 -4036990896 -4036990897 -4036990898 4036990899 -4036990900 -4036990901 -4036990902 -4036990903 -4036990904 -4036990905 4036990906 4036990907 4036990908 @@ -960,11 +818,6 @@ 4036991875 4036991876 4036991877 -4036991878 -4036991879 -4036991880 -4036991881 -4036991882 4036991883 4036991884 4036991885 @@ -973,16 +826,9 @@ 4036991888 4036991889 4036991890 -4036991891 -4036991892 4036991893 4036991894 4036991895 -4036991896 -4036991897 -4036991898 -4036991899 -4036991900 4036991901 4036991902 4036991903 @@ -992,19 +838,10 @@ 4036991907 4036991908 4036991909 -4036991910 -4036991911 -4036991912 4036991913 -4036991914 4036991915 4036991916 -4036991917 -4036991918 -4036991919 4036991920 -4036991921 -4036991922 4036991923 4036991924 4036991925 @@ -1015,9 +852,6 @@ 4036991930 4036991931 4036991932 -4036991933 -4036991934 -4036991935 4036992928 4036992929 4036992930 @@ -1031,18 +865,6 @@ 4036992938 4036992939 4036992944 -4036994176 -4036994177 -4036994178 -4036994179 -4036994180 -4036994181 -4036994182 -4036994183 -4036994184 -4036994185 -4036994186 -4036994187 4036994188 4036994189 4036994190 @@ -1090,7 +912,6 @@ 4036994232 4036994233 4036994234 -4036994235 4036994236 4036994237 4036994238 @@ -1101,7 +922,6 @@ 4036994435 4036994436 4036994437 -4036994438 4036994439 4036994440 4036994441 @@ -1375,122 +1195,17 @@ 4036996020 4036996021 4036996022 -14844296 14844297 14845090 14845113 -14845584 -14845585 -14845586 -14845587 14845588 14845589 14845590 14845591 14845592 14845593 -14845594 -14845595 -14845596 -14845597 -14845598 -14845599 -14845600 -14845601 -14845602 -14845603 -14845604 -14845605 -14845606 -14845607 -14845608 14845609 14845610 -14845611 -14845612 -14845613 -14845614 -14845615 -14845616 -14845617 -14845618 -14845619 -14845620 -14845621 -14845622 -14845623 -14845624 -14845625 -14845626 -14845627 -14845628 -14845629 -14845630 -14845631 -14845824 -14845825 -14845826 -14845827 -14845828 -14845829 -14845830 -14845831 -14845832 -14845833 -14845834 -14845835 -14845836 -14845837 -14845838 -14845839 -14845840 -14845841 -14845842 -14845843 -14845844 -14845845 -14845846 -14845847 -14845848 -14845849 -14845850 -14845851 -14845852 -14845853 -14845854 -14845855 -14845856 -14845857 -14845858 -14845859 -14845860 -14845861 -14845862 -14845863 -14845864 -14845865 -14845866 -14845867 -14845868 -14845869 -14845870 -14845871 -14845872 -14845873 -14845874 -14845875 -14845876 -14845877 -14845878 -14845879 -14845880 -14845881 -14845882 -14845883 -14845884 -14845885 -14845886 -14845887 14847130 14847131 14847144 @@ -1522,73 +1237,24 @@ 14850178 14850179 14850180 -14850181 -14850182 -14850183 -14850184 -14850185 -14850186 -14850187 -14850188 -14850189 14850190 -14850191 -14850192 14850193 -14850194 -14850195 14850196 14850197 -14850198 -14850199 14850200 -14850201 -14850202 -14850203 -14850204 14850205 -14850206 -14850207 14850208 -14850209 14850210 14850211 -14850212 -14850213 14850214 -14850215 -14850216 -14850217 14850218 -14850219 -14850220 -14850221 14850222 14850223 -14850224 -14850225 -14850226 -14850227 -14850228 -14850229 -14850230 -14850231 14850232 14850233 14850234 -14850235 -14850236 -14850237 -14850238 -14850239 14850432 -14850433 14850434 -14850435 -14850436 -14850437 -14850438 -14850439 14850440 14850441 14850442 @@ -1601,369 +1267,86 @@ 14850449 14850450 14850451 -14850452 -14850453 -14850454 -14850455 -14850456 -14850457 -14850458 -14850459 -14850460 -14850461 -14850462 14850463 14850464 -14850465 -14850466 14850467 -14850468 14850469 14850470 -14850471 14850472 -14850473 -14850474 -14850475 -14850476 -14850477 -14850478 -14850479 -14850480 -14850481 -14850482 -14850483 -14850484 -14850485 -14850486 -14850487 -14850488 -14850489 -14850490 14850491 -14850492 -14850493 14850494 14850495 -14850688 -14850689 -14850690 -14850691 -14850692 -14850693 -14850694 -14850695 -14850696 -14850697 -14850698 -14850699 -14850700 -14850701 -14850702 -14850703 -14850704 -14850705 14850706 14850707 14850708 14850709 14850710 14850711 -14850712 14850713 -14850714 14850715 14850716 -14850717 -14850718 -14850719 14850720 14850721 -14850722 -14850723 -14850724 -14850725 -14850726 14850727 -14850728 -14850729 14850730 14850731 -14850732 -14850733 -14850734 -14850735 14850736 14850737 -14850738 -14850739 -14850740 -14850741 -14850742 -14850743 -14850744 -14850745 -14850746 -14850747 -14850748 14850749 14850750 -14850751 -14850944 -14850945 -14850946 -14850947 14850948 14850949 -14850950 -14850951 14850952 -14850953 -14850954 -14850955 -14850956 -14850957 14850958 14850959 -14850960 14850961 -14850962 14850963 14850964 -14850965 -14850966 -14850967 -14850968 -14850969 -14850970 -14850971 -14850972 -14850973 -14850974 -14850975 -14850976 -14850977 -14850978 -14850979 -14850980 -14850981 -14850982 -14850983 -14850984 14850985 14850986 -14850987 -14850988 -14850989 -14850990 -14850991 14850992 14850993 14850994 14850995 14850996 14850997 -14850998 14850999 14851000 14851001 14851002 -14851003 -14851004 14851005 -14851006 -14851007 -14851200 -14851201 14851202 -14851203 -14851204 14851205 -14851206 -14851207 14851208 14851209 14851210 14851211 14851212 14851213 -14851214 14851215 -14851216 -14851217 14851218 -14851219 14851220 -14851221 14851222 -14851223 -14851224 -14851225 -14851226 -14851227 -14851228 14851229 -14851230 -14851231 -14851232 14851233 -14851234 -14851235 -14851236 -14851237 -14851238 -14851239 14851240 -14851241 -14851242 -14851243 -14851244 -14851245 -14851246 -14851247 -14851248 -14851249 -14851250 14851251 14851252 -14851253 -14851254 -14851255 -14851256 -14851257 -14851258 -14851259 -14851260 -14851261 -14851262 -14851263 -14851456 -14851457 -14851458 -14851459 14851460 -14851461 -14851462 14851463 -14851464 -14851465 -14851466 -14851467 14851468 -14851469 14851470 -14851471 -14851472 -14851473 -14851474 14851475 14851476 14851477 -14851478 14851479 -14851480 -14851481 -14851482 -14851483 -14851484 -14851485 -14851486 -14851487 -14851488 -14851489 -14851490 14851491 14851492 -14851493 -14851494 -14851495 -14851496 -14851497 -14851498 -14851499 -14851500 -14851501 -14851502 -14851503 -14851504 -14851505 -14851506 -14851507 -14851508 -14851509 -14851510 -14851511 -14851512 -14851513 -14851514 -14851515 -14851516 -14851517 -14851518 -14851519 -14851712 -14851713 -14851714 -14851715 -14851716 -14851717 -14851718 -14851719 -14851720 -14851721 -14851722 -14851723 -14851724 -14851725 -14851726 -14851727 -14851728 -14851729 -14851730 -14851731 -14851732 14851733 14851734 14851735 -14851736 -14851737 -14851738 -14851739 -14851740 -14851741 -14851742 -14851743 -14851744 14851745 -14851746 -14851747 -14851748 -14851749 -14851750 -14851751 -14851752 -14851753 -14851754 -14851755 -14851756 -14851757 -14851758 -14851759 14851760 -14851761 -14851762 -14851763 -14851764 -14851765 -14851766 -14851767 -14851768 -14851769 -14851770 -14851771 -14851772 -14851773 -14851774 14851775 14853300 14853301 diff --git a/level_0/f_utf/data/tests/bytesequences/superscript-all.txt b/level_0/f_utf/data/tests/bytesequences/superscript-all.txt index b4606e8..d2e8f5f 100644 --- a/level_0/f_utf/data/tests/bytesequences/superscript-all.txt +++ b/level_0/f_utf/data/tests/bytesequences/superscript-all.txt @@ -1,38 +1,276 @@ -14792098 -14792099 -14792100 -14792101 -14792102 -14792103 -14792104 -14792105 -14792106 -14844544 -14844545 -14844546 -14844547 -14844548 -14844549 -14844550 -14844551 -14844552 -14844553 -14844554 -14844555 -14844556 -14844557 -14844558 -14844560 -14844561 -14844562 -14844563 -14844564 -14844565 -14844566 -14844567 -14844568 -14844569 -14844570 -14844571 -14844572 -14856636 +49832 +49834 +49842 +49843 +49849 +49850 +51888 +51889 +51890 +51891 +51892 +51893 +51894 +51895 +51896 +52128 +52129 +52130 +52131 +52132 +4036009600 +4036009601 +4036009602 +4036009603 +4036009604 +4036009605 +4036009607 +4036009608 +4036009609 +4036009610 +4036009611 +4036009612 +4036009613 +4036009614 +4036009615 +4036009616 +4036009617 +4036009618 +4036009619 +4036009620 +4036009621 +4036009622 +4036009623 +4036009624 +4036009625 +4036009626 +4036009627 +4036009628 +4036009629 +4036009630 +4036009631 +4036009632 +4036009633 +4036009634 +4036009635 +4036009636 +4036009637 +4036009638 +4036009639 +4036009640 +4036009641 +4036009642 +4036009643 +4036009644 +4036009645 +4036009646 +4036009647 +4036009648 +4036009650 +4036009651 +4036009652 +4036009653 +4036009654 +4036009655 +4036009656 +4036009657 +4036009658 +14779324 +14782620 +14782621 +14782622 +14782623 +14782624 +14782625 +14782626 +14782627 +14782628 +14782629 +14782630 +14782631 +14782632 +14782633 +14782634 +14782857 +14782858 +14782859 +14783107 +14783108 +14783137 +14783138 +14783163 +14783164 +14783165 +14783166 +14783376 +14783377 +14783378 +14783402 +14783403 +14783621 +14783622 +14783623 +14783624 +14783625 +14783626 +14783627 +14783653 +14783678 +14783679 +14783872 +14783873 +14783888 +14783889 +14783901 +14783914 +14783919 +14783931 +14783933 +14784133 +14784149 +14784150 +14784159 +14784166 +14784174 +14784430 +14784641 +14784902 +14784903 +14784922 +14784958 +14784959 +14791852 +14791853 +14791854 +14791855 +14791856 +14791857 +14791858 +14791859 +14791860 +14791861 +14791862 +14791863 +14791864 +14791865 +14791866 +14791867 +14791868 +14791869 +14791870 +14791871 +14792064 +14792065 +14792066 +14792067 +14792068 +14792069 +14792070 +14792071 +14792072 +14792073 +14792074 +14792075 +14792076 +14792077 +14792078 +14792079 +14792080 +14792081 +14792082 +14792083 +14792084 +14792085 +14792086 +14792087 +14792088 +14792089 +14792090 +14792091 +14792092 +14792093 +14792094 +14792095 +14792096 +14792097 +14792120 +14792347 +14792348 +14792349 +14792350 +14792351 +14792352 +14792353 +14792354 +14792355 +14792356 +14792357 +14792358 +14792359 +14792360 +14792361 +14792362 +14792363 +14792364 +14792365 +14792366 +14792367 +14792368 +14792369 +14792370 +14792371 +14792372 +14792373 +14792374 +14792375 +14792376 +14792377 +14792378 +14792379 +14792380 +14792381 +14792382 +14792383 +14844336 +14844337 +14844340 +14844341 +14844342 +14844343 +14844344 +14844345 +14844346 +14844347 +14844348 +14844349 +14844350 +14844351 +14856637 +14857647 +14911121 +14911122 +14911123 +14911124 +14911125 +14911126 +14911127 +14911128 +14911129 +14911130 +14911131 +14911132 +14911133 +14911134 +14911135 +15375004 +15375005 +15375792 +15376306 +15376307 +15376308 +15376312 +15376313 +15379868 +15379869 +15379870 +15379871 diff --git a/level_0/f_utf/data/tests/codepoints/combining-all.txt b/level_0/f_utf/data/tests/codepoints/combining-all.txt index 2a16393..5be5993 100644 --- a/level_0/f_utf/data/tests/codepoints/combining-all.txt +++ b/level_0/f_utf/data/tests/codepoints/combining-all.txt @@ -203,19 +203,12 @@ U+065E U+065F U+0670 U+06D6 -U+06D6 -U+06D7 U+06D7 U+06D8 -U+06D8 -U+06D9 U+06D9 U+06DA -U+06DA -U+06DB U+06DB U+06DC -U+06DC U+06DF U+06E0 U+06E1 @@ -230,59 +223,32 @@ U+06EC U+06ED U+0711 U+0730 -U+0730 -U+0731 U+0731 U+0732 -U+0732 -U+0733 U+0733 U+0734 -U+0734 -U+0735 U+0735 U+0736 -U+0736 U+0737 -U+0737 -U+0738 U+0738 U+0739 -U+0739 -U+073A U+073A U+073B -U+073B -U+073C U+073C U+073D -U+073D U+073E -U+073E -U+073F U+073F U+0740 -U+0740 U+0741 -U+0741 -U+0742 U+0742 U+0743 -U+0743 U+0744 -U+0744 -U+0745 U+0745 U+0746 -U+0746 -U+0747 U+0747 U+0748 -U+0748 -U+0749 U+0749 U+074A -U+074A U+07A6 U+07A7 U+07A8 @@ -676,10 +642,8 @@ U+1086 U+108D U+109D U+10A01 -U+10A01 U+10A02 U+10A03 -U+10A03 U+10A05 U+10A06 U+10A0C @@ -746,11 +710,8 @@ U+111BC U+111BD U+111BE U+111CA -U+111CA -U+111CB U+111CB U+111CC -U+111CC U+1122F U+11230 U+11231 @@ -758,7 +719,6 @@ U+11234 U+11236 U+11237 U+112DF -U+112DF U+112E3 U+112E4 U+112E5 @@ -1255,14 +1215,9 @@ U+1DF8 U+1DF9 U+1DFA U+1DFB -U+1DFB U+1DFC -U+1DFC -U+1DFD U+1DFD U+1DFE -U+1DFE -U+1DFF U+1DFF U+1E8D0 U+1E8D1 @@ -1309,7 +1264,6 @@ U+302B U+302C U+302D U+3099 -U+3099 U+309A U+A66F U+A670 @@ -1360,509 +1314,14 @@ U+A69D U+A69E U+A69F U+A6F0 -U+A6F0 -U+A6F1 U+A6F1 -U+A6F2 -U+A6F3 -U+A6F4 -U+A6F5 -U+A6F6 -U+A6F7 -U+A6F8 -U+A6F9 -U+A6FA -U+A6FB -U+A6FC -U+A6FD -U+A6FE -U+A6FF -U+A700 -U+A701 -U+A702 -U+A703 -U+A704 -U+A705 -U+A706 -U+A707 -U+A708 -U+A709 -U+A70A -U+A70B -U+A70C -U+A70D -U+A70E -U+A70F -U+A710 -U+A711 -U+A712 -U+A713 -U+A714 -U+A715 -U+A716 -U+A717 -U+A718 -U+A719 -U+A71A -U+A71B -U+A71C -U+A71D -U+A71E -U+A71F -U+A720 -U+A721 -U+A722 -U+A723 -U+A724 -U+A725 -U+A726 -U+A727 -U+A728 -U+A729 -U+A72A -U+A72B -U+A72C -U+A72D -U+A72E -U+A72F -U+A730 -U+A731 -U+A732 -U+A733 -U+A734 -U+A735 -U+A736 -U+A737 -U+A738 -U+A739 -U+A73A -U+A73B -U+A73C -U+A73D -U+A73E -U+A73F -U+A740 -U+A741 -U+A742 -U+A743 -U+A744 -U+A745 -U+A746 -U+A747 -U+A748 -U+A749 -U+A74A -U+A74B -U+A74C -U+A74D -U+A74E -U+A74F -U+A750 -U+A751 -U+A752 -U+A753 -U+A754 -U+A755 -U+A756 -U+A757 -U+A758 -U+A759 -U+A75A -U+A75B -U+A75C -U+A75D -U+A75E -U+A75F -U+A760 -U+A761 -U+A762 -U+A763 -U+A764 -U+A765 -U+A766 -U+A767 -U+A768 -U+A769 -U+A76A -U+A76B -U+A76C -U+A76D -U+A76E -U+A76F -U+A770 -U+A771 -U+A772 -U+A773 -U+A774 -U+A775 -U+A776 -U+A777 -U+A778 -U+A779 -U+A77A -U+A77B -U+A77C -U+A77D -U+A77E -U+A77F -U+A780 -U+A781 -U+A782 -U+A783 -U+A784 -U+A785 -U+A786 -U+A787 -U+A788 -U+A789 -U+A78A -U+A78B -U+A78C -U+A78D -U+A78E -U+A78F -U+A790 -U+A791 -U+A792 -U+A793 -U+A794 -U+A795 -U+A796 -U+A797 -U+A798 -U+A799 -U+A79A -U+A79B -U+A79C -U+A79D -U+A79E -U+A79F -U+A7A0 -U+A7A1 -U+A7A2 -U+A7A3 -U+A7A4 -U+A7A5 -U+A7A6 -U+A7A7 -U+A7A8 -U+A7A9 -U+A7AA -U+A7AB -U+A7AC -U+A7AD -U+A7AE -U+A7AF -U+A7B0 -U+A7B1 -U+A7B2 -U+A7B3 -U+A7B4 -U+A7B5 -U+A7B6 -U+A7B7 -U+A7B8 -U+A7B9 -U+A7BA -U+A7BB -U+A7BC -U+A7BD -U+A7BE -U+A7BF -U+A7C0 -U+A7C1 -U+A7C2 -U+A7C3 -U+A7C4 -U+A7C5 -U+A7C6 -U+A7C7 -U+A7C8 -U+A7C9 -U+A7CA -U+A7CB -U+A7CC -U+A7CD -U+A7CE -U+A7CF -U+A7D0 -U+A7D1 -U+A7D2 -U+A7D3 -U+A7D4 -U+A7D5 -U+A7D6 -U+A7D7 -U+A7D8 -U+A7D9 -U+A7DA -U+A7DB -U+A7DC -U+A7DD -U+A7DE -U+A7DF -U+A7E0 -U+A7E1 -U+A7E2 -U+A7E3 -U+A7E4 -U+A7E5 -U+A7E6 -U+A7E7 -U+A7E8 -U+A7E9 -U+A7EA -U+A7EB -U+A7EC -U+A7ED -U+A7EE -U+A7EF -U+A7F0 -U+A7F1 -U+A7F2 -U+A7F3 -U+A7F4 -U+A7F5 -U+A7F6 -U+A7F7 -U+A7F8 -U+A7F9 -U+A7FA -U+A7FB -U+A7FC -U+A7FD -U+A7FE -U+A7FF -U+A800 -U+A801 U+A802 -U+A802 -U+A803 -U+A804 -U+A805 -U+A806 U+A806 -U+A807 -U+A808 -U+A809 -U+A80A -U+A80B U+A80B -U+A80C -U+A80D -U+A80E -U+A80F -U+A810 -U+A811 -U+A812 -U+A813 -U+A814 -U+A815 -U+A816 -U+A817 -U+A818 -U+A819 -U+A81A -U+A81B -U+A81C -U+A81D -U+A81E -U+A81F -U+A820 -U+A821 -U+A822 -U+A823 -U+A824 U+A825 -U+A825 -U+A826 U+A826 -U+A827 -U+A828 -U+A829 -U+A82A -U+A82B -U+A82C -U+A82D -U+A82E -U+A82F -U+A830 -U+A831 -U+A832 -U+A833 -U+A834 -U+A835 -U+A836 -U+A837 -U+A838 -U+A839 -U+A83A -U+A83B -U+A83C -U+A83D -U+A83E -U+A83F -U+A840 -U+A841 -U+A842 -U+A843 -U+A844 -U+A845 -U+A846 -U+A847 -U+A848 -U+A849 -U+A84A -U+A84B -U+A84C -U+A84D -U+A84E -U+A84F -U+A850 -U+A851 -U+A852 -U+A853 -U+A854 -U+A855 -U+A856 -U+A857 -U+A858 -U+A859 -U+A85A -U+A85B -U+A85C -U+A85D -U+A85E -U+A85F -U+A860 -U+A861 -U+A862 -U+A863 -U+A864 -U+A865 -U+A866 -U+A867 -U+A868 -U+A869 -U+A86A -U+A86B -U+A86C -U+A86D -U+A86E -U+A86F -U+A870 -U+A871 -U+A872 -U+A873 -U+A874 -U+A875 -U+A876 -U+A877 -U+A878 -U+A879 -U+A87A -U+A87B -U+A87C -U+A87D -U+A87E -U+A87F -U+A880 -U+A881 -U+A882 -U+A883 -U+A884 -U+A885 -U+A886 -U+A887 -U+A888 -U+A889 -U+A88A -U+A88B -U+A88C -U+A88D -U+A88E -U+A88F -U+A890 -U+A891 -U+A892 -U+A893 -U+A894 -U+A895 -U+A896 -U+A897 -U+A898 -U+A899 -U+A89A -U+A89B -U+A89C -U+A89D -U+A89E -U+A89F -U+A8A0 -U+A8A1 -U+A8A2 -U+A8A3 -U+A8A4 -U+A8A5 -U+A8A6 -U+A8A7 -U+A8A8 -U+A8A9 -U+A8AA -U+A8AB -U+A8AC -U+A8AD -U+A8AE -U+A8AF -U+A8B0 -U+A8B1 -U+A8B2 -U+A8B3 -U+A8B4 -U+A8B5 -U+A8B6 -U+A8B7 -U+A8B8 -U+A8B9 -U+A8BA -U+A8BB -U+A8BC -U+A8BD -U+A8BE -U+A8BF -U+A8C0 -U+A8C1 -U+A8C2 -U+A8C3 -U+A8C4 U+A8C4 U+A8C5 -U+A8C6 -U+A8C7 -U+A8C8 -U+A8C9 -U+A8CA -U+A8CB -U+A8CC -U+A8CD -U+A8CE -U+A8CF -U+A8D0 -U+A8D1 -U+A8D2 -U+A8D3 -U+A8D4 -U+A8D5 -U+A8D6 -U+A8D7 -U+A8D8 -U+A8D9 -U+A8DA -U+A8DB -U+A8DC -U+A8DD -U+A8DE -U+A8DF U+A8E0 U+A8E1 U+A8E2 diff --git a/level_0/f_utf/data/tests/codepoints/emoji-all.txt b/level_0/f_utf/data/tests/codepoints/emoji-all.txt index 53ce47b..3a0a325 100644 --- a/level_0/f_utf/data/tests/codepoints/emoji-all.txt +++ b/level_0/f_utf/data/tests/codepoints/emoji-all.txt @@ -4,7 +4,6 @@ U+1F004 U+1F0CF U+1F170 U+1F171 -U+1F17E U+1F17F U+1F18E U+1F191 @@ -17,16 +16,6 @@ U+1F197 U+1F198 U+1F199 U+1F19A -U+1F1E6 -U+1F1E7 -U+1F1E8 -U+1F1E9 -U+1F1EA -U+1F1EB -U+1F1EC -U+1F1ED -U+1F1EE -U+1F1EF U+1F201 U+1F202 U+1F21A @@ -76,8 +65,6 @@ U+1F31E U+1F31F U+1F320 U+1F321 -U+1F322 -U+1F323 U+1F324 U+1F325 U+1F326 @@ -190,16 +177,11 @@ U+1F390 U+1F391 U+1F392 U+1F393 -U+1F394 -U+1F395 U+1F396 U+1F397 -U+1F398 U+1F399 U+1F39A U+1F39B -U+1F39C -U+1F39D U+1F39E U+1F39F U+1F3A0 @@ -283,12 +265,9 @@ U+1F3ED U+1F3EE U+1F3EF U+1F3F0 -U+1F3F1 -U+1F3F2 U+1F3F3 U+1F3F4 U+1F3F5 -U+1F3F6 U+1F3F7 U+1F3F8 U+1F3F9 @@ -552,7 +531,6 @@ U+1F4FA U+1F4FB U+1F4FC U+1F4FD -U+1F4FE U+1F4FF U+1F500 U+1F501 @@ -616,24 +594,12 @@ U+1F53A U+1F53B U+1F53C U+1F53D -U+1F53E -U+1F53F -U+1F540 -U+1F541 -U+1F542 -U+1F543 -U+1F544 -U+1F545 -U+1F546 -U+1F547 -U+1F548 U+1F549 U+1F54A U+1F54B U+1F54C U+1F54D U+1F54E -U+1F54F U+1F550 U+1F551 U+1F552 @@ -658,17 +624,8 @@ U+1F564 U+1F565 U+1F566 U+1F567 -U+1F568 -U+1F569 -U+1F56A -U+1F56B -U+1F56C -U+1F56D -U+1F56E U+1F56F U+1F570 -U+1F571 -U+1F572 U+1F573 U+1F574 U+1F575 @@ -677,133 +634,34 @@ U+1F577 U+1F578 U+1F579 U+1F57A -U+1F57B -U+1F57C -U+1F57D -U+1F57E -U+1F57F -U+1F580 -U+1F581 -U+1F582 -U+1F583 -U+1F584 -U+1F585 -U+1F586 U+1F587 -U+1F588 -U+1F589 U+1F58A U+1F58B U+1F58C U+1F58D -U+1F58E -U+1F58F U+1F590 -U+1F591 -U+1F592 -U+1F593 -U+1F594 U+1F595 U+1F596 -U+1F597 -U+1F598 -U+1F599 -U+1F59A -U+1F59B -U+1F59C -U+1F59D -U+1F59E -U+1F59F -U+1F5A0 -U+1F5A1 -U+1F5A2 -U+1F5A3 U+1F5A4 U+1F5A5 -U+1F5A6 -U+1F5A7 U+1F5A8 -U+1F5A9 -U+1F5AA -U+1F5AB -U+1F5AC -U+1F5AD -U+1F5AE -U+1F5AF -U+1F5B0 U+1F5B1 U+1F5B2 -U+1F5B3 -U+1F5B4 -U+1F5B5 -U+1F5B6 -U+1F5B7 -U+1F5B8 -U+1F5B9 -U+1F5BA -U+1F5BB U+1F5BC -U+1F5BD -U+1F5BE -U+1F5BF -U+1F5C0 -U+1F5C1 U+1F5C2 U+1F5C3 U+1F5C4 -U+1F5C5 -U+1F5C6 -U+1F5C7 -U+1F5C8 -U+1F5C9 -U+1F5CA -U+1F5CB -U+1F5CC -U+1F5CD -U+1F5CE -U+1F5CF -U+1F5D0 U+1F5D1 U+1F5D2 U+1F5D3 -U+1F5D4 -U+1F5D5 -U+1F5D6 -U+1F5D7 -U+1F5D8 -U+1F5D9 -U+1F5DA -U+1F5DB U+1F5DC U+1F5DD U+1F5DE -U+1F5DF -U+1F5E0 U+1F5E1 -U+1F5E2 U+1F5E3 -U+1F5E4 -U+1F5E5 -U+1F5E6 -U+1F5E7 U+1F5E8 -U+1F5E9 -U+1F5EA -U+1F5EB -U+1F5EC -U+1F5ED -U+1F5EE U+1F5EF -U+1F5F0 -U+1F5F1 -U+1F5F2 U+1F5F3 -U+1F5F4 -U+1F5F5 -U+1F5F6 -U+1F5F7 -U+1F5F8 -U+1F5F9 U+1F5FA U+1F5FB U+1F5FC @@ -960,11 +818,6 @@ U+1F6C2 U+1F6C3 U+1F6C4 U+1F6C5 -U+1F6C6 -U+1F6C7 -U+1F6C8 -U+1F6C9 -U+1F6CA U+1F6CB U+1F6CC U+1F6CD @@ -973,16 +826,9 @@ U+1F6CF U+1F6D0 U+1F6D1 U+1F6D2 -U+1F6D3 -U+1F6D4 U+1F6D5 U+1F6D6 U+1F6D7 -U+1F6D8 -U+1F6D9 -U+1F6DA -U+1F6DB -U+1F6DC U+1F6DD U+1F6DE U+1F6DF @@ -992,19 +838,10 @@ U+1F6E2 U+1F6E3 U+1F6E4 U+1F6E5 -U+1F6E6 -U+1F6E7 -U+1F6E8 U+1F6E9 -U+1F6EA U+1F6EB U+1F6EC -U+1F6ED -U+1F6EE -U+1F6EF U+1F6F0 -U+1F6F1 -U+1F6F2 U+1F6F3 U+1F6F4 U+1F6F5 @@ -1015,9 +852,6 @@ U+1F6F9 U+1F6FA U+1F6FB U+1F6FC -U+1F6FD -U+1F6FE -U+1F6FF U+1F7E0 U+1F7E1 U+1F7E2 @@ -1031,18 +865,6 @@ U+1F7E9 U+1F7EA U+1F7EB U+1F7F0 -U+1F900 -U+1F901 -U+1F902 -U+1F903 -U+1F904 -U+1F905 -U+1F906 -U+1F907 -U+1F908 -U+1F909 -U+1F90A -U+1F90B U+1F90C U+1F90D U+1F90E @@ -1090,7 +912,6 @@ U+1F937 U+1F938 U+1F939 U+1F93A -U+1F93B U+1F93C U+1F93D U+1F93E @@ -1101,7 +922,6 @@ U+1F942 U+1F943 U+1F944 U+1F945 -U+1F946 U+1F947 U+1F948 U+1F949 @@ -1375,122 +1195,17 @@ U+1FAF3 U+1FAF4 U+1FAF5 U+1FAF6 -U+2048 U+2049 U+2122 U+2139 -U+2190 -U+2191 -U+2192 -U+2193 U+2194 U+2195 U+2196 U+2197 U+2198 U+2199 -U+219A -U+219B -U+219C -U+219D -U+219E -U+219F -U+21A0 -U+21A1 -U+21A2 -U+21A3 -U+21A4 -U+21A5 -U+21A6 -U+21A7 -U+21A8 U+21A9 U+21AA -U+21AB -U+21AC -U+21AD -U+21AE -U+21AF -U+21B0 -U+21B1 -U+21B2 -U+21B3 -U+21B4 -U+21B5 -U+21B6 -U+21B7 -U+21B8 -U+21B9 -U+21BA -U+21BB -U+21BC -U+21BD -U+21BE -U+21BF -U+21C0 -U+21C1 -U+21C2 -U+21C3 -U+21C4 -U+21C5 -U+21C6 -U+21C7 -U+21C8 -U+21C9 -U+21CA -U+21CB -U+21CC -U+21CD -U+21CE -U+21CF -U+21D0 -U+21D1 -U+21D2 -U+21D3 -U+21D4 -U+21D5 -U+21D6 -U+21D7 -U+21D8 -U+21D9 -U+21DA -U+21DB -U+21DC -U+21DD -U+21DE -U+21DF -U+21E0 -U+21E1 -U+21E2 -U+21E3 -U+21E4 -U+21E5 -U+21E6 -U+21E7 -U+21E8 -U+21E9 -U+21EA -U+21EB -U+21EC -U+21ED -U+21EE -U+21EF -U+21F0 -U+21F1 -U+21F2 -U+21F3 -U+21F4 -U+21F5 -U+21F6 -U+21F7 -U+21F8 -U+21F9 -U+21FA -U+21FB -U+21FC -U+21FD -U+21FE -U+21FF U+231A U+231B U+2328 @@ -1522,73 +1237,24 @@ U+2601 U+2602 U+2603 U+2604 -U+2605 -U+2606 -U+2607 -U+2608 -U+2609 -U+260A -U+260B -U+260C -U+260D U+260E -U+260F -U+2610 U+2611 -U+2612 -U+2613 U+2614 U+2615 -U+2616 -U+2617 U+2618 -U+2619 -U+261A -U+261B -U+261C U+261D -U+261E -U+261F U+2620 -U+2621 U+2622 U+2623 -U+2624 -U+2625 U+2626 -U+2627 -U+2628 -U+2629 U+262A -U+262B -U+262C -U+262D U+262E U+262F -U+2630 -U+2631 -U+2632 -U+2633 -U+2634 -U+2635 -U+2636 -U+2637 U+2638 U+2639 U+263A -U+263B -U+263C -U+263D -U+263E -U+263F U+2640 -U+2641 U+2642 -U+2643 -U+2644 -U+2645 -U+2646 -U+2647 U+2648 U+2649 U+264A @@ -1601,369 +1267,86 @@ U+2650 U+2651 U+2652 U+2653 -U+2654 -U+2655 -U+2656 -U+2657 -U+2658 -U+2659 -U+265A -U+265B -U+265C -U+265D -U+265E U+265F U+2660 -U+2661 -U+2662 U+2663 -U+2664 U+2665 U+2666 -U+2667 U+2668 -U+2669 -U+266A -U+266B -U+266C -U+266D -U+266E -U+266F -U+2670 -U+2671 -U+2672 -U+2673 -U+2674 -U+2675 -U+2676 -U+2677 -U+2678 -U+2679 -U+267A U+267B -U+267C -U+267D U+267E U+267F -U+2680 -U+2681 -U+2682 -U+2683 -U+2684 -U+2685 -U+2686 -U+2687 -U+2688 -U+2689 -U+268A -U+268B -U+268C -U+268D -U+268E -U+268F -U+2690 -U+2691 U+2692 U+2693 U+2694 U+2695 U+2696 U+2697 -U+2698 U+2699 -U+269A U+269B U+269C -U+269D -U+269E -U+269F U+26A0 U+26A1 -U+26A2 -U+26A3 -U+26A4 -U+26A5 -U+26A6 U+26A7 -U+26A8 -U+26A9 U+26AA U+26AB -U+26AC -U+26AD -U+26AE -U+26AF U+26B0 U+26B1 -U+26B2 -U+26B3 -U+26B4 -U+26B5 -U+26B6 -U+26B7 -U+26B8 -U+26B9 -U+26BA -U+26BB -U+26BC U+26BD U+26BE -U+26BF -U+26C0 -U+26C1 -U+26C2 -U+26C3 U+26C4 U+26C5 -U+26C6 -U+26C7 U+26C8 -U+26C9 -U+26CA -U+26CB -U+26CC -U+26CD U+26CE U+26CF -U+26D0 U+26D1 -U+26D2 U+26D3 U+26D4 -U+26D5 -U+26D6 -U+26D7 -U+26D8 -U+26D9 -U+26DA -U+26DB -U+26DC -U+26DD -U+26DE -U+26DF -U+26E0 -U+26E1 -U+26E2 -U+26E3 -U+26E4 -U+26E5 -U+26E6 -U+26E7 -U+26E8 U+26E9 U+26EA -U+26EB -U+26EC -U+26ED -U+26EE -U+26EF U+26F0 U+26F1 U+26F2 U+26F3 U+26F4 U+26F5 -U+26F6 U+26F7 U+26F8 U+26F9 U+26FA -U+26FB -U+26FC U+26FD -U+26FE -U+26FF -U+2700 -U+2701 U+2702 -U+2703 -U+2704 U+2705 -U+2706 -U+2707 U+2708 U+2709 U+270A U+270B U+270C U+270D -U+270E U+270F -U+2710 -U+2711 U+2712 -U+2713 U+2714 -U+2715 U+2716 -U+2717 -U+2718 -U+2719 -U+271A -U+271B -U+271C U+271D -U+271E -U+271F -U+2720 U+2721 -U+2722 -U+2723 -U+2724 -U+2725 -U+2726 -U+2727 U+2728 -U+2729 -U+272A -U+272B -U+272C -U+272D -U+272E -U+272F -U+2730 -U+2731 -U+2732 U+2733 U+2734 -U+2735 -U+2736 -U+2737 -U+2738 -U+2739 -U+273A -U+273B -U+273C -U+273D -U+273E -U+273F -U+2740 -U+2741 -U+2742 -U+2743 U+2744 -U+2745 -U+2746 U+2747 -U+2748 -U+2749 -U+274A -U+274B U+274C -U+274D U+274E -U+274F -U+2750 -U+2751 -U+2752 U+2753 U+2754 U+2755 -U+2756 U+2757 -U+2758 -U+2759 -U+275A -U+275B -U+275C -U+275D -U+275E -U+275F -U+2760 -U+2761 -U+2762 U+2763 U+2764 -U+2765 -U+2766 -U+2767 -U+2768 -U+2769 -U+276A -U+276B -U+276C -U+276D -U+276E -U+276F -U+2770 -U+2771 -U+2772 -U+2773 -U+2774 -U+2775 -U+2776 -U+2777 -U+2778 -U+2779 -U+277A -U+277B -U+277C -U+277D -U+277E -U+277F -U+2780 -U+2781 -U+2782 -U+2783 -U+2784 -U+2785 -U+2786 -U+2787 -U+2788 -U+2789 -U+278A -U+278B -U+278C -U+278D -U+278E -U+278F -U+2790 -U+2791 -U+2792 -U+2793 -U+2794 U+2795 U+2796 U+2797 -U+2798 -U+2799 -U+279A -U+279B -U+279C -U+279D -U+279E -U+279F -U+27A0 U+27A1 -U+27A2 -U+27A3 -U+27A4 -U+27A5 -U+27A6 -U+27A7 -U+27A8 -U+27A9 -U+27AA -U+27AB -U+27AC -U+27AD -U+27AE -U+27AF U+27B0 -U+27B1 -U+27B2 -U+27B3 -U+27B4 -U+27B5 -U+27B6 -U+27B7 -U+27B8 -U+27B9 -U+27BA -U+27BB -U+27BC -U+27BD -U+27BE U+27BF U+2934 U+2935 diff --git a/level_0/f_utf/tests/unit/c/data-utf.c b/level_0/f_utf/tests/unit/c/data-utf.c index 3205e93..193dfef 100644 --- a/level_0/f_utf/tests/unit/c/data-utf.c +++ b/level_0/f_utf/tests/unit/c/data-utf.c @@ -94,20 +94,19 @@ FILE *data__bytesequence_file_open__zero_width(void) { return fopen("./data/tests/bytesequences/zero_width-all.txt", "r"); } -uint8_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const character) { +ssize_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const character) { - size_t length = 32; + size_t length = 0; char *line = 0; - const int8_t bytes = (int8_t) getline(&line, &length, file); - if (bytes == -1) return 0; + const ssize_t bytes = getline(&line, &length, file); - if (!bytes) { + if (bytes == -1 || !bytes) { if (line) { free(line); } - return 0; + return bytes; } const long long number = atoll(line); @@ -116,13 +115,23 @@ uint8_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const char free(line); } - if (!number) return -1; + if (!number && bytes != 2 && line[0] != '0') return -1; - *character = (f_utf_char_t) number; - - // Network order is always big-endian so take advantage of this for converting little endian integers. #ifdef _is_F_endian_little - *character = htonl(*character); + if ((F_utf_char_mask_byte_1_le_d & number) == number) { + *character = (f_utf_char_t) ((F_utf_char_mask_byte_1_le_d & number) << 24); + } + else if ((F_utf_char_mask_byte_2_le_d & number) == number) { + *character = (f_utf_char_t) ((F_utf_char_mask_byte_2_le_d & number) << 16); + } + else if ((F_utf_char_mask_byte_3_le_d & number) == number) { + *character = (f_utf_char_t) ((F_utf_char_mask_byte_3_le_d & number) << 8); + } + else if ((F_utf_char_mask_byte_4_le_d & number) == number) { + *character = (f_utf_char_t) ((F_utf_char_mask_byte_4_le_d & number)); + } + #else + *character = (f_utf_char_t) number; #endif // _is_F_endian_little return bytes; diff --git a/level_0/f_utf/tests/unit/c/data-utf.h b/level_0/f_utf/tests/unit/c/data-utf.h index 9b1b185..0fc4b0f 100644 --- a/level_0/f_utf/tests/unit/c/data-utf.h +++ b/level_0/f_utf/tests/unit/c/data-utf.h @@ -311,13 +311,13 @@ extern FILE *data__bytesequence_file_open__zero_width(void); * @return * positive number on success where number represents bytes read. * 0 on success and end of file is reached. - * 0 on failure. + * -1 on failure. * * @see atoll() * @see getline() * @see htonl() */ -extern uint8_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const character); +extern ssize_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const character); #ifdef __cplusplus } // extern "C" diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic.c index 5e6751b..dca994a 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_alphabetic__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_alphabetic(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_combining.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_combining.c index 1312e6f..fed1e1f 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_combining.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_combining.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_combining__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_combining(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_control.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_control.c index e40e1aa..0b3dc6c 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_control.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_control.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_control__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_control(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_digit.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_digit.c index 507bb32..52d7882 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_digit.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_digit.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_digit__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_digit(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_emoji.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_emoji.c index 1ddbe2a..478d744 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_emoji.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_emoji.c @@ -13,17 +13,16 @@ void test__f_utf_character_is_emoji__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_emoji(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_numeric.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_numeric.c index dce0932..397e328 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_numeric.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_numeric.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_numeric__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_numeric(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_phonetic.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_phonetic.c index 6c2724c..f9e26ea 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_phonetic.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_phonetic.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_phonetic__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_phonetic(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_private.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_private.c index bf176cc..3cb4b9e 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_private.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_private.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_private__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_private(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_punctuation.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_punctuation.c index b0bea4c..b4c642c 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_punctuation.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_punctuation.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_punctuation__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_punctuation(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_subscript.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_subscript.c index 7fe409b..de1a1cb 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_subscript.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_subscript.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_subscript__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_subscript(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_superscript.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_superscript.c index 0757a47..23f3926 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_superscript.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_superscript.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_superscript__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_superscript(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_surrogate.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_surrogate.c index 8a8d6a7..e7bf61a 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_surrogate.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_surrogate.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_surrogate__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_surrogate(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_symbol.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_symbol.c index 9eccad6..ee7c8a9 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_symbol.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_symbol.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_symbol__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_symbol(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c index b2b426f..06b0058 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c @@ -7,59 +7,76 @@ extern "C" { void test__f_utf_character_is_valid__works(void **state) { - for (uint32_t character = 0; character < UINT32_MAX; ++character) { + for (f_utf_char_t character = 0; character < UINT32_MAX; ++character) { const uint8_t first = macro_f_utf_char_t_to_char_1(character); const uint8_t second = macro_f_utf_char_t_to_char_2(character); const uint8_t third = macro_f_utf_char_t_to_char_3(character); const uint8_t fourth = macro_f_utf_char_t_to_char_4(character); + const uint8_t width = macro_f_utf_char_t_width_is(character); const f_status_t status = f_utf_character_is_valid(character); - // Valid: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. - if ((first & 0b11111000) == 0b11111000) { - assert_false(status); + // All remaining bytes after width must be zero for valid character. + if (width == 0 && (second || third || fourth)) { + assert_int_equal(status, F_false); } - else if ((first & 0b11111000) == 0b11110000) { - if ((second & 0b11000000) == 0b10000000 && (third & 0b11000000) == 0b10000000 && (fourth & 0b11000000) == 0b10000000) { - assert_true(status); - } - else { - assert_false(status); - } + else if (width == 1) { + assert_int_equal(status, F_status_set_error(F_utf_fragment)); + } + else if (width == 2 && (third || fourth)) { + assert_int_equal(status, F_false); } + else if (width == 3 && fourth) { + assert_int_equal(status, F_false); + } + else { - // Valid: 1110xxxx 10xxxxxx 10xxxxxx 00000000. - else if ((first & 0b11110000) == 0b11100000) { - if ((second & 0b11000000) == 0b10000000 && (third & 0b11000000) == 0b10000000) { - assert_true(status); + // Valid: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. + if ((first & 0b11111000) == 0b11111000) { + assert_int_equal(status, F_false); } - else { - assert_false(status); + else if ((first & 0b11111000) == 0b11110000) { + if ((second & 0b11000000) == 0b10000000 && (third & 0b11000000) == 0b10000000 && (fourth & 0b11000000) == 0b10000000) { + assert_int_equal(status, F_true); + } + else { + assert_int_equal(status, F_false); + } } - } - // Valid: 110xxxxx 10xxxxxx 00000000 00000000. - else if ((first & 0b11100000) == 0b11000000) { - if ((second & 0b11000000) == 0b10000000) { - assert_true(status); + // Valid: 1110xxxx 10xxxxxx 10xxxxxx 00000000. + else if ((first & 0b11110000) == 0b11100000) { + if ((second & 0b11000000) == 0b10000000 && (third & 0b11000000) == 0b10000000) { + assert_int_equal(status, F_true); + } + else { + assert_int_equal(status, F_false); + } } - else { - assert_false(status); + + // Valid: 110xxxxx 10xxxxxx 00000000 00000000. + else if ((first & 0b11100000) == 0b11000000) { + if ((second & 0b11000000) == 0b10000000) { + assert_int_equal(status, F_true); + } + else { + assert_int_equal(status, F_false); + } } - } - // Invalid (UTF Fragment): 10xxxxxx 00000000 00000000 00000000. - else if ((first & 0b11000000) == 0b10000000) { - assert_int_equal(status, F_status_set_error(F_utf_fragment)); - } + // Invalid (UTF Fragment): 10xxxxxx 00000000 00000000 00000000. + else if ((first & 0b11000000) == 0b10000000) { + assert_int_equal(status, F_status_set_error(F_utf_fragment)); + } - // Valid: 0xxxxxxx 00000000 00000000 00000000. - else if (!(first & 0b10000000)) { - assert_true(status); - } - else { - assert_false(status); + // Valid: 0xxxxxxx 00000000 00000000 00000000. + else if (first & 0b10000000) { + assert_int_equal(status, F_false); + } + else { + assert_int_equal(status, F_true); + } } } // for } diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_whitespace.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_whitespace.c index 8df3e02..546c1b1 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_whitespace.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_whitespace.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_whitespace__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_whitespace(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_wide.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_wide.c index aa06c90..ca9905b 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_wide.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_wide.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_wide__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_wide(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_word.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_word.c index 3b07e18..941cc6b 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_word.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_word.c @@ -13,18 +13,18 @@ void test__f_utf_character_is_word__strict_is_false(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_word(character, F_false); // @todo provide an array of codes that should return false when not strict. - assert_true(status); + assert_int_equal(status, F_true); } ++line; @@ -43,17 +43,17 @@ void test__f_utf_character_is_word__strict_is_true(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_word(character, F_true); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_zero_width.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_zero_width.c index 11c477a..d6a3917 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_zero_width.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_zero_width.c @@ -13,17 +13,17 @@ void test__f_utf_character_is_zero_width__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; do { bytes = data__bytesequence_get_line(file, &character); - if (bytes) { + if (bytes > 0) { const f_status_t status = f_utf_character_is_zero_width(character); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c index 656be86..1e00c6b 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c @@ -13,7 +13,7 @@ void test__f_utf_is_alphabetic__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_alphabetic__works(void **state) { const f_status_t status = f_utf_is_alphabetic(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_combining.c b/level_0/f_utf/tests/unit/c/test-utf-is_combining.c index 180b2bf..bd782f6 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_combining.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_combining.c @@ -13,7 +13,7 @@ void test__f_utf_is_combining__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_combining__works(void **state) { const f_status_t status = f_utf_is_combining(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_control.c b/level_0/f_utf/tests/unit/c/test-utf-is_control.c index fbdb61b..34624c2 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_control.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_control.c @@ -13,7 +13,7 @@ void test__f_utf_is_control__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_control__works(void **state) { const f_status_t status = f_utf_is_control(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_digit.c b/level_0/f_utf/tests/unit/c/test-utf-is_digit.c index fb3a48f..83e8568 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_digit.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_digit.c @@ -13,7 +13,7 @@ void test__f_utf_is_digit__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_digit__works(void **state) { const f_status_t status = f_utf_is_digit(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c b/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c index a2d26f3..d477440 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c @@ -13,7 +13,7 @@ void test__f_utf_is_emoji__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_emoji__works(void **state) { const f_status_t status = f_utf_is_emoji(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c b/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c index e0bd4dc..90c5b6c 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c @@ -13,7 +13,7 @@ void test__f_utf_is_numeric__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_numeric__works(void **state) { const f_status_t status = f_utf_is_numeric(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c b/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c index e3bd806..ddeb0ac 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c @@ -13,7 +13,7 @@ void test__f_utf_is_phonetic__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_phonetic__works(void **state) { const f_status_t status = f_utf_is_phonetic(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_private.c b/level_0/f_utf/tests/unit/c/test-utf-is_private.c index 9c0d07f..e3bc846 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_private.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_private.c @@ -13,7 +13,7 @@ void test__f_utf_is_private__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_private__works(void **state) { const f_status_t status = f_utf_is_private(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c b/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c index f407498..5275c0b 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c @@ -13,7 +13,7 @@ void test__f_utf_is_punctuation__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_punctuation__works(void **state) { const f_status_t status = f_utf_is_punctuation(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c b/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c index 0bf3ac1..aa5965f 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c @@ -13,7 +13,7 @@ void test__f_utf_is_subscript__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_subscript__works(void **state) { const f_status_t status = f_utf_is_subscript(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c b/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c index 1c7e418..cc15d2a 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c @@ -13,7 +13,7 @@ void test__f_utf_is_superscript__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_superscript__works(void **state) { const f_status_t status = f_utf_is_superscript(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_surrogate.c b/level_0/f_utf/tests/unit/c/test-utf-is_surrogate.c index 4807e12..403a662 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_surrogate.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_surrogate.c @@ -13,7 +13,7 @@ void test__f_utf_is_surrogate__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_surrogate__works(void **state) { const f_status_t status = f_utf_is_surrogate(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c b/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c index e4dd674..8986f10 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c @@ -13,7 +13,7 @@ void test__f_utf_is_symbol__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_symbol__works(void **state) { const f_status_t status = f_utf_is_symbol(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_valid.c b/level_0/f_utf/tests/unit/c/test-utf-is_valid.c index 9e12eec..32857d9 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_valid.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_valid.c @@ -21,34 +21,34 @@ void test__f_utf_is_valid__works(void **state) { // Valid: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. if ((first & 0b11111000) == 0b11111000) { - assert_false(status); + assert_int_equal(status, F_false); } else if ((first & 0b11111000) == 0b11110000) { if ((second & 0b11000000) == 0b10000000 && (third & 0b11000000) == 0b10000000 && (fourth & 0b11000000) == 0b10000000) { - assert_true(status); + assert_int_equal(status, F_true); } else { - assert_false(status); + assert_int_equal(status, F_false); } } // Valid: 1110xxxx 10xxxxxx 10xxxxxx 00000000. else if ((first & 0b11110000) == 0b11100000) { if ((second & 0b11000000) == 0b10000000 && (third & 0b11000000) == 0b10000000) { - assert_true(status); + assert_int_equal(status, F_true); } else { - assert_false(status); + assert_int_equal(status, F_false); } } // Valid: 110xxxxx 10xxxxxx 00000000 00000000. else if ((first & 0b11100000) == 0b11000000) { if ((second & 0b11000000) == 0b10000000) { - assert_true(status); + assert_int_equal(status, F_true); } else { - assert_false(status); + assert_int_equal(status, F_false); } } @@ -59,10 +59,10 @@ void test__f_utf_is_valid__works(void **state) { // Valid: 0xxxxxxx 00000000 00000000 00000000. else if (!(first & 0b10000000)) { - assert_true(status); + assert_int_equal(status, F_true); } else { - assert_false(status); + assert_int_equal(status, F_false); } } // for } diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c b/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c index 5b4dc30..66e70d9 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c @@ -13,7 +13,7 @@ void test__f_utf_is_whitespace__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_whitespace__works(void **state) { const f_status_t status = f_utf_is_whitespace(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_wide.c b/level_0/f_utf/tests/unit/c/test-utf-is_wide.c index f9882ef..36ca32e 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_wide.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_wide.c @@ -13,7 +13,7 @@ void test__f_utf_is_wide__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_wide__works(void **state) { const f_status_t status = f_utf_is_wide(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_word.c b/level_0/f_utf/tests/unit/c/test-utf-is_word.c index b572408..bfdbfeb 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_word.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_word.c @@ -13,7 +13,7 @@ void test__f_utf_is_word__strict_is_false(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -41,7 +41,7 @@ void test__f_utf_is_word__strict_is_false(void **state) { const f_status_t status = f_utf_is_word(buffer, F_false, 5); // @todo provide an array of codes that should return false when not strict. - assert_true(status); + assert_int_equal(status, F_true); } ++line; @@ -60,7 +60,7 @@ void test__f_utf_is_word__strict_is_true(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -87,7 +87,7 @@ void test__f_utf_is_word__strict_is_true(void **state) { const f_status_t status = f_utf_is_word(buffer, F_true, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c b/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c index 6087717..83195a3 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c @@ -13,7 +13,7 @@ void test__f_utf_is_zero_width__works(void **state) { assert_non_null(file); f_utf_char_t character = 0; - int8_t bytes = 0; + ssize_t bytes = 0; f_array_length_t line = 0; @@ -40,7 +40,7 @@ void test__f_utf_is_zero_width__works(void **state) { const f_status_t status = f_utf_is_zero_width(buffer, 5); - assert_true(status); + assert_int_equal(status, F_true); } ++line; -- 1.8.3.1