From: Kevin Day Date: Sat, 25 Jun 2022 04:13:46 +0000 (-0500) Subject: Update: Implement more f_utf unit tests. X-Git-Tag: 0.5.10~19 X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=61e7433091a940f71b03875046ddb9602d032187;p=fll Update: Implement more f_utf unit tests. Only the is white space tests are yet to be implemented. --- diff --git a/level_0/f_utf/data/build/settings-tests b/level_0/f_utf/data/build/settings-tests index e1b5557..62e6dca 100644 --- a/level_0/f_utf/data/build/settings-tests +++ b/level_0/f_utf/data/build/settings-tests @@ -27,6 +27,8 @@ build_libraries-individual -lf_memory -lf_string -lf_utf build_sources_program test-utf-append.c test-utf-append_assure.c test-utf-append_assure_nulless.c test-utf-append_nulless.c build_sources_program test-utf-character_is_alphabetic.c test-utf-is_alphabetic.c +build_sources_program test-utf-character_is_alphabetic_digit.c test-utf-is_alphabetic_digit.c +build_sources_program test-utf-character_is_alphabetic_numeric.c test-utf-is_alphabetic_numeric.c build_sources_program test-utf-character_is_combining.c test-utf-is_combining.c build_sources_program test-utf-character_is_control.c test-utf-is_control.c build_sources_program test-utf-character_is_digit.c test-utf-is_digit.c @@ -42,6 +44,8 @@ build_sources_program test-utf-character_is_valid.c test-utf-is_valid.c build_sources_program test-utf-character_is_whitespace.c test-utf-is_whitespace.c build_sources_program test-utf-character_is_wide.c test-utf-is_wide.c build_sources_program test-utf-character_is_word.c test-utf-is_word.c +build_sources_program test-utf-character_is_word_dash.c test-utf-is_word_dash.c +build_sources_program test-utf-character_is_word_dash_plus.c test-utf-is_word_dash_plus.c build_sources_program test-utf-character_is_zero_width.c test-utf-is_zero_width.c build_sources_program test-utf-dynamic_adjust.c test-utf-dynamic_append.c test-utf-dynamic_append_assure.c test-utf-dynamic_append_assure_nulless.c test-utf-dynamic_append_nulless.c build_sources_program test-utf-dynamic_decimate_by.c test-utf-dynamic_decrease_by.c test-utf-dynamic_increase.c test-utf-dynamic_increase_by.c diff --git a/level_0/f_utf/data/tests/bytesequences/word-all.txt b/level_0/f_utf/data/tests/bytesequences/word-all.txt new file mode 100644 index 0000000..250189e --- /dev/null +++ b/level_0/f_utf/data/tests/bytesequences/word-all.txt @@ -0,0 +1,11 @@ +95 +14844095 +14844094 +14844288 +14844308 +15710605 +15710606 +15710607 +15711423 +15710387 +15710388 diff --git a/level_0/f_utf/data/tests/bytesequences/word-lax.txt b/level_0/f_utf/data/tests/bytesequences/word-lax.txt new file mode 100644 index 0000000..0ebf2a8 --- /dev/null +++ b/level_0/f_utf/data/tests/bytesequences/word-lax.txt @@ -0,0 +1,9 @@ +95 +14844095 +14844094 +14844288 +14844308 +15710605 +15710606 +15710607 +15711423 diff --git a/level_0/f_utf/data/tests/bytesequences/word_dash-all.txt b/level_0/f_utf/data/tests/bytesequences/word_dash-all.txt new file mode 100644 index 0000000..10e536e --- /dev/null +++ b/level_0/f_utf/data/tests/bytesequences/word_dash-all.txt @@ -0,0 +1,12 @@ +45 +95 +14844095 +14844094 +14844288 +14844308 +15710605 +15710606 +15710607 +15711423 +15710387 +15710388 diff --git a/level_0/f_utf/data/tests/bytesequences/word_dash-lax.txt b/level_0/f_utf/data/tests/bytesequences/word_dash-lax.txt new file mode 100644 index 0000000..4ea8349 --- /dev/null +++ b/level_0/f_utf/data/tests/bytesequences/word_dash-lax.txt @@ -0,0 +1,10 @@ +45 +95 +14844095 +14844094 +14844288 +14844308 +15710605 +15710606 +15710607 +15711423 diff --git a/level_0/f_utf/data/tests/bytesequences/word_dash_plus-all.txt b/level_0/f_utf/data/tests/bytesequences/word_dash_plus-all.txt new file mode 100644 index 0000000..9fc3025 --- /dev/null +++ b/level_0/f_utf/data/tests/bytesequences/word_dash_plus-all.txt @@ -0,0 +1,13 @@ +43 +45 +95 +14844095 +14844094 +14844288 +14844308 +15710605 +15710606 +15710607 +15711423 +15710387 +15710388 diff --git a/level_0/f_utf/data/tests/bytesequences/word_dash_plus-lax.txt b/level_0/f_utf/data/tests/bytesequences/word_dash_plus-lax.txt new file mode 100644 index 0000000..15b0c6c --- /dev/null +++ b/level_0/f_utf/data/tests/bytesequences/word_dash_plus-lax.txt @@ -0,0 +1,11 @@ +43 +45 +95 +14844095 +14844094 +14844288 +14844308 +15710605 +15710606 +15710607 +15711423 diff --git a/level_0/f_utf/data/tests/bytesequences/zero_width-all.txt b/level_0/f_utf/data/tests/bytesequences/zero_width-all.txt new file mode 100644 index 0000000..6b5c7e0 --- /dev/null +++ b/level_0/f_utf/data/tests/bytesequences/zero_width-all.txt @@ -0,0 +1,36 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +127 +14786702 +14844043 +14844044 +14844045 +14844320 +15711167 diff --git a/level_0/f_utf/data/tests/codepoints/word-all.txt b/level_0/f_utf/data/tests/codepoints/word-all.txt new file mode 100644 index 0000000..9586ace --- /dev/null +++ b/level_0/f_utf/data/tests/codepoints/word-all.txt @@ -0,0 +1,11 @@ +U+005F +U+203F +U+203E +U+2040 +U+2054 +U+FE4D +U+FE4E +U+FE4F +U+FF3F +U+FE33 +U+FE34 diff --git a/level_0/f_utf/data/tests/codepoints/word-lax.txt b/level_0/f_utf/data/tests/codepoints/word-lax.txt new file mode 100644 index 0000000..ef4ee40 --- /dev/null +++ b/level_0/f_utf/data/tests/codepoints/word-lax.txt @@ -0,0 +1,9 @@ +U+005F +U+203F +U+203E +U+2040 +U+2054 +U+FE4D +U+FE4E +U+FE4F +U+FF3F diff --git a/level_0/f_utf/data/tests/codepoints/word_dash-all.txt b/level_0/f_utf/data/tests/codepoints/word_dash-all.txt new file mode 100644 index 0000000..405ec77 --- /dev/null +++ b/level_0/f_utf/data/tests/codepoints/word_dash-all.txt @@ -0,0 +1,12 @@ +U+002D +U+005F +U+203F +U+203E +U+2040 +U+2054 +U+FE4D +U+FE4E +U+FE4F +U+FF3F +U+FE33 +U+FE34 diff --git a/level_0/f_utf/data/tests/codepoints/word_dash-lax.txt b/level_0/f_utf/data/tests/codepoints/word_dash-lax.txt new file mode 100644 index 0000000..f71b0e7 --- /dev/null +++ b/level_0/f_utf/data/tests/codepoints/word_dash-lax.txt @@ -0,0 +1,10 @@ +U+002D +U+005F +U+203F +U+203E +U+2040 +U+2054 +U+FE4D +U+FE4E +U+FE4F +U+FF3F diff --git a/level_0/f_utf/data/tests/codepoints/word_dash_plus-all.txt b/level_0/f_utf/data/tests/codepoints/word_dash_plus-all.txt new file mode 100644 index 0000000..d47698d --- /dev/null +++ b/level_0/f_utf/data/tests/codepoints/word_dash_plus-all.txt @@ -0,0 +1,13 @@ +U+002B +U+002D +U+005F +U+203F +U+203E +U+2040 +U+2054 +U+FE4D +U+FE4E +U+FE4F +U+FF3F +U+FE33 +U+FE34 diff --git a/level_0/f_utf/data/tests/codepoints/word_dash_plus-lax.txt b/level_0/f_utf/data/tests/codepoints/word_dash_plus-lax.txt new file mode 100644 index 0000000..d8d0538 --- /dev/null +++ b/level_0/f_utf/data/tests/codepoints/word_dash_plus-lax.txt @@ -0,0 +1,11 @@ +U+002B +U+002D +U+005F +U+203F +U+203E +U+2040 +U+2054 +U+FE4D +U+FE4E +U+FE4F +U+FF3F diff --git a/level_0/f_utf/data/tests/codepoints/zero_width-all.txt b/level_0/f_utf/data/tests/codepoints/zero_width-all.txt new file mode 100644 index 0000000..9293586 --- /dev/null +++ b/level_0/f_utf/data/tests/codepoints/zero_width-all.txt @@ -0,0 +1,36 @@ +U+0000 +U+0001 +U+0002 +U+0003 +U+0004 +U+0005 +U+0006 +U+0007 +U+0008 +U+000C +U+000D +U+000E +U+000F +U+0010 +U+0011 +U+0012 +U+0013 +U+0014 +U+0015 +U+0016 +U+0017 +U+0018 +U+0019 +U+001A +U+001B +U+001C +U+001D +U+001E +U+001F +U+007F +U+180E +U+200B +U+200C +U+200D +U+2060 +U+FEFF diff --git a/level_0/f_utf/tests/unit/c/data-utf.c b/level_0/f_utf/tests/unit/c/data-utf.c index bf775a6..ed036b0 100644 --- a/level_0/f_utf/tests/unit/c/data-utf.c +++ b/level_0/f_utf/tests/unit/c/data-utf.c @@ -84,6 +84,31 @@ FILE *data__bytesequence_file_open__word(void) { return fopen("./data/tests/bytesequences/word-all.txt", "r"); } +FILE *data__bytesequence_file_open__word_dash(void) { + + return fopen("./data/tests/bytesequences/word_dash-all.txt", "r"); +} + +FILE *data__bytesequence_file_open__word_dash_plus(void) { + + return fopen("./data/tests/bytesequences/word_dash_plus-all.txt", "r"); +} + +FILE *data__bytesequence_file_open_lax__word(void) { + + return fopen("./data/tests/bytesequences/word-lax.txt", "r"); +} + +FILE *data__bytesequence_file_open_lax__word_dash(void) { + + return fopen("./data/tests/bytesequences/word_dash-lax.txt", "r"); +} + +FILE *data__bytesequence_file_open_lax__word_dash_plus(void) { + + return fopen("./data/tests/bytesequences/word_dash_plus-lax.txt", "r"); +} + FILE *data__bytesequence_file_open__zero_width(void) { return fopen("./data/tests/bytesequences/zero_width-all.txt", "r"); diff --git a/level_0/f_utf/tests/unit/c/data-utf.h b/level_0/f_utf/tests/unit/c/data-utf.h index e1bcfe1..6126e34 100644 --- a/level_0/f_utf/tests/unit/c/data-utf.h +++ b/level_0/f_utf/tests/unit/c/data-utf.h @@ -263,6 +263,81 @@ extern FILE *data__bytesequence_file_open__wide(void); extern FILE *data__bytesequence_file_open__word(void); /** + * Open the "word_dash" bytesequence file. + * + * This assumes the following: + * - The file path is relative to the current working directory (tests are run from project root). + * - The file path is "data/tests/bytesequences/word_dash-all.txt". + * + * @return + * Non-zero on success. + * 0 on failure. + * + * @see fopen() + */ +extern FILE *data__bytesequence_file_open__word_dash(void); + +/** + * Open the "word_dash_plus" bytesequence file. + * + * This assumes the following: + * - The file path is relative to the current working directory (tests are run from project root). + * - The file path is "data/tests/bytesequences/word_dash_plus-all.txt". + * + * @return + * Non-zero on success. + * 0 on failure. + * + * @see fopen() + */ +extern FILE *data__bytesequence_file_open__word_dash_plus(void); + +/** + * Open the "word" bytesequence file, the "lax" variation. + * + * This assumes the following: + * - The file path is relative to the current working directory (tests are run from project root). + * - The file path is "data/tests/bytesequences/word-lax.txt". + * + * @return + * Non-zero on success. + * 0 on failure. + * + * @see fopen() + */ +extern FILE *data__bytesequence_file_open_lax__word(void); + +/** + * Open the "word_dash" bytesequence file, the "lax" variation. + * + * This assumes the following: + * - The file path is relative to the current working directory (tests are run from project root). + * - The file path is "data/tests/bytesequences/word_dash-lax.txt". + * + * @return + * Non-zero on success. + * 0 on failure. + * + * @see fopen() + */ +extern FILE *data__bytesequence_file_open_lax__word_dash(void); + +/** + * Open the "word_dash_plus" bytesequence file, the "lax" variation. + * + * This assumes the following: + * - The file path is relative to the current working directory (tests are run from project root). + * - The file path is "data/tests/bytesequences/word_dash_plus-lax.txt". + * + * @return + * Non-zero on success. + * 0 on failure. + * + * @see fopen() + */ +extern FILE *data__bytesequence_file_open_lax__word_dash_plus(void); + +/** * Open the "zero_width" bytesequence file. * * This assumes the following: diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic.c index 9f234c5..9194cbe 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic.c @@ -5,33 +5,111 @@ extern "C" { #endif +static inline void private__test__f_utf_character_is_alphabetic__works(const f_status_t status, const f_utf_char_t sequence) { + + if (!f_utf_character_is_valid(sequence)) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + assert_int_equal(status, F_status_set_error(F_utf_fragment)); + } + else { + assert_int_equal(status, F_false); + } + } + else if (f_utf_character_is_zero_width(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control_picture(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_combining(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace_modifier(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_numeric(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_punctuation(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_symbol(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_phonetic(sequence)) { + assert_int_equal(status, F_false); + } + else { + assert_int_equal(status, F_true); + } +} + void test__f_utf_character_is_alphabetic__works(void **state) { - { - FILE *file = data__bytesequence_file_open__alphabetic(); + // To save time and effort, this test cheats by assuming the other functions are correct. + // Use the other functions to determine what the correct return result should be. + f_utf_char_t sequence = f_utf_char_t_initialize; - assert_non_null(file); + uint8_t first = 0; + uint8_t second = 0; + uint8_t third = 0; + uint8_t fourth = 0; - f_utf_char_t sequence = 0; - ssize_t bytes = 0; + // 1-Byte (ASCII). + for (first = 0x00; first < 0x80; ++first) { - f_array_length_t line = 0; + sequence = macro_f_utf_char_t_from_char_1(first); - do { - bytes = data__bytesequence_get_line(file, &sequence); + private__test__f_utf_character_is_alphabetic__works(f_utf_character_is_alphabetic(sequence), sequence); + } // for - if (bytes > 0) { - const f_status_t status = f_utf_character_is_alphabetic(sequence); + // 2-Byte (ASCII). + for (first = 0xc2; first < 0xe0; ++first) { - assert_int_equal(status, F_true); - } + for (second = 0x80; second < 0xbf; ++second) { - ++line; + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second); - } while (bytes > 0); + private__test__f_utf_character_is_alphabetic__works(f_utf_character_is_alphabetic(sequence), sequence); + } // for + } // for - fclose(file); - } + // 3-Byte (ASCII). + for (first = 0xe0; first < 0xf0; ++first) { + + for (second = 0x80; second < 0xbf; ++second) { + + for (third = 0x80; third < 0xbf; ++third) { + + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second) | macro_f_utf_char_t_from_char_3(third); + + private__test__f_utf_character_is_alphabetic__works(f_utf_character_is_alphabetic(sequence), sequence); + } // for + } // for + } // for + + // 4-Byte (ASCII). + for (first = 0xf0; first < 0xf8; ++first) { + + for (second = 0x80; second < 0xbf; ++second) { + + for (third = 0x80; third < 0xbf; ++third) { + + for (fourth = 0x80; fourth < 0xbf; ++fourth) { + + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second) | macro_f_utf_char_t_from_char_3(third) | macro_f_utf_char_t_from_char_4(fourth); + + private__test__f_utf_character_is_alphabetic__works(f_utf_character_is_alphabetic(sequence), sequence); + } // for + } // for + } // for + } // for } #ifdef __cplusplus diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_digit.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_digit.c new file mode 100644 index 0000000..f58a6de --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_digit.c @@ -0,0 +1,120 @@ +#include "test-utf.h" +#include "test-utf-character_is_alphabetic_digit.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void private__test__f_utf_character_is_alphabetic_digit__works(const f_status_t status, const f_utf_char_t sequence) { + + if (!f_utf_character_is_valid(sequence)) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + assert_int_equal(status, F_status_set_error(F_utf_fragment)); + } + else { + assert_int_equal(status, F_false); + } + } + else if (f_utf_character_is_digit(sequence, 0)) { + assert_int_equal(status, F_true); + } + else if (f_utf_character_is_zero_width(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control_picture(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_combining(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace_modifier(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_numeric(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_punctuation(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_symbol(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_phonetic(sequence)) { + assert_int_equal(status, F_false); + } + else { + assert_int_equal(status, F_true); + } +} + +void test__f_utf_character_is_alphabetic_digit__works(void **state) { + + // To save time and effort, this test cheats by assuming the other functions are correct. + // Use the other functions to determine what the correct return result should be. + f_utf_char_t sequence = f_utf_char_t_initialize; + + uint8_t first = 0; + uint8_t second = 0; + uint8_t third = 0; + uint8_t fourth = 0; + + // 1-Byte (ASCII). + for (first = 0x00; first < 0x80; ++first) { + + sequence = macro_f_utf_char_t_from_char_1(first); + + private__test__f_utf_character_is_alphabetic_digit__works(f_utf_character_is_alphabetic_digit(sequence, 0), sequence); + } // for + + // 2-Byte (ASCII). + for (first = 0xc2; first < 0xe0; ++first) { + + for (second = 0x80; second < 0xbf; ++second) { + + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second); + + private__test__f_utf_character_is_alphabetic_digit__works(f_utf_character_is_alphabetic_digit(sequence, 0), sequence); + } // for + } // for + + // 3-Byte (ASCII). + for (first = 0xe0; first < 0xf0; ++first) { + + for (second = 0x80; second < 0xbf; ++second) { + + for (third = 0x80; third < 0xbf; ++third) { + + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second) | macro_f_utf_char_t_from_char_3(third); + + private__test__f_utf_character_is_alphabetic_digit__works(f_utf_character_is_alphabetic_digit(sequence, 0), sequence); + } // for + } // for + } // for + + // 4-Byte (ASCII). + for (first = 0xf0; first < 0xf8; ++first) { + + for (second = 0x80; second < 0xbf; ++second) { + + for (third = 0x80; third < 0xbf; ++third) { + + for (fourth = 0x80; fourth < 0xbf; ++fourth) { + + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second) | macro_f_utf_char_t_from_char_3(third) | macro_f_utf_char_t_from_char_4(fourth); + + private__test__f_utf_character_is_alphabetic_digit__works(f_utf_character_is_alphabetic_digit(sequence, 0), sequence); + } // for + } // for + } // for + } // for +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_digit.h b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_digit.h new file mode 100644 index 0000000..d415b25 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_digit.h @@ -0,0 +1,20 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_character_is_alphabetic_digit_h +#define _TEST__F_utf_character_is_alphabetic_digit_h + +/** + * Test that the function works. + * + * @see f_utf_character_is_alphabetic_digit() + */ +extern void test__f_utf_character_is_alphabetic_digit__works(void **state); + +#endif // _TEST__F_utf_character_is_alphabetic_digit_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_numeric.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_numeric.c new file mode 100644 index 0000000..e13cb3a --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_numeric.c @@ -0,0 +1,117 @@ +#include "test-utf.h" +#include "test-utf-character_is_alphabetic_numeric.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void private__test__f_utf_character_is_alphabetic_numeric__works(const f_status_t status, const f_utf_char_t sequence) { + + if (!f_utf_character_is_valid(sequence)) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + assert_int_equal(status, F_status_set_error(F_utf_fragment)); + } + else { + assert_int_equal(status, F_false); + } + } + else if (f_utf_character_is_numeric(sequence)) { + assert_int_equal(status, F_true); + } + else if (f_utf_character_is_zero_width(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control_picture(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_combining(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace_modifier(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_punctuation(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_symbol(sequence)) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_phonetic(sequence)) { + assert_int_equal(status, F_false); + } + else { + assert_int_equal(status, F_true); + } +} + +void test__f_utf_character_is_alphabetic_numeric__works(void **state) { + + // To save time and effort, this test cheats by assuming the other functions are correct. + // Use the other functions to determine what the correct return result should be. + f_utf_char_t sequence = f_utf_char_t_initialize; + + uint8_t first = 0; + uint8_t second = 0; + uint8_t third = 0; + uint8_t fourth = 0; + + // 1-Byte (ASCII). + for (first = 0x00; first < 0x80; ++first) { + + sequence = macro_f_utf_char_t_from_char_1(first); + + private__test__f_utf_character_is_alphabetic_numeric__works(f_utf_character_is_alphabetic_numeric(sequence), sequence); + } // for + + // 2-Byte (ASCII). + for (first = 0xc2; first < 0xe0; ++first) { + + for (second = 0x80; second < 0xbf; ++second) { + + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second); + + private__test__f_utf_character_is_alphabetic_numeric__works(f_utf_character_is_alphabetic_numeric(sequence), sequence); + } // for + } // for + + // 3-Byte (ASCII). + for (first = 0xe0; first < 0xf0; ++first) { + + for (second = 0x80; second < 0xbf; ++second) { + + for (third = 0x80; third < 0xbf; ++third) { + + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second) | macro_f_utf_char_t_from_char_3(third); + + private__test__f_utf_character_is_alphabetic_numeric__works(f_utf_character_is_alphabetic_numeric(sequence), sequence); + } // for + } // for + } // for + + // 4-Byte (ASCII). + for (first = 0xf0; first < 0xf8; ++first) { + + for (second = 0x80; second < 0xbf; ++second) { + + for (third = 0x80; third < 0xbf; ++third) { + + for (fourth = 0x80; fourth < 0xbf; ++fourth) { + + sequence = macro_f_utf_char_t_from_char_1(first) | macro_f_utf_char_t_from_char_2(second) | macro_f_utf_char_t_from_char_3(third) | macro_f_utf_char_t_from_char_4(fourth); + + private__test__f_utf_character_is_alphabetic_numeric__works(f_utf_character_is_alphabetic_numeric(sequence), sequence); + } // for + } // for + } // for + } // for +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_numeric.h b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_numeric.h new file mode 100644 index 0000000..4b872fe --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_alphabetic_numeric.h @@ -0,0 +1,20 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_character_is_alphabetic_numeric_h +#define _TEST__F_utf_character_is_alphabetic_numeric_h + +/** + * Test that the function works. + * + * @see f_utf_character_is_alphabetic_numeric() + */ +extern void test__f_utf_character_is_alphabetic_numeric__works(void **state); + +#endif // _TEST__F_utf_character_is_alphabetic_numeric_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c index c1267c7..2733816 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_valid.c @@ -7,15 +7,23 @@ extern "C" { void test__f_utf_character_is_valid__works(void **state) { + uint8_t first = 0; + uint8_t second = 0; + uint8_t third = 0; + uint8_t fourth = 0; + uint8_t width = 0; + + f_status_t status = F_none; + for (f_utf_char_t sequence = 0; sequence < UINT32_MAX; ++sequence) { - const uint8_t first = macro_f_utf_char_t_to_char_1(sequence); - const uint8_t second = macro_f_utf_char_t_to_char_2(sequence); - const uint8_t third = macro_f_utf_char_t_to_char_3(sequence); - const uint8_t fourth = macro_f_utf_char_t_to_char_4(sequence); - const uint8_t width = macro_f_utf_char_t_width_is(sequence); + first = macro_f_utf_char_t_to_char_1(sequence); + second = macro_f_utf_char_t_to_char_2(sequence); + third = macro_f_utf_char_t_to_char_3(sequence); + fourth = macro_f_utf_char_t_to_char_4(sequence); + width = macro_f_utf_char_t_width_is(sequence); - const f_status_t status = f_utf_character_is_valid(sequence); + status = f_utf_character_is_valid(sequence); // All remaining bytes after width must be zero for valid sequence. if (width == 0 && (second || third || fourth)) { diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_word.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_word.c index 459ad04..f622f8f 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-character_is_word.c +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_word.c @@ -8,7 +8,7 @@ extern "C" { void test__f_utf_character_is_word__strict_is_false(void **state) { { - FILE *file = data__bytesequence_file_open__word(); + FILE *file = data__bytesequence_file_open_lax__word(); assert_non_null(file); @@ -23,7 +23,6 @@ void test__f_utf_character_is_word__strict_is_false(void **state) { if (bytes > 0) { const f_status_t status = f_utf_character_is_word(sequence, F_false); - // @todo provide an array of codes that should return false when not strict. assert_int_equal(status, F_true); } diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash.c new file mode 100644 index 0000000..f2c2411 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash.c @@ -0,0 +1,68 @@ +#include "test-utf.h" +#include "test-utf-character_is_word_dash.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void test__f_utf_character_is_word_dash__strict_is_false(void **state) { + + { + FILE *file = data__bytesequence_file_open_lax__word_dash(); + + assert_non_null(file); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + + if (bytes > 0) { + const f_status_t status = f_utf_character_is_word_dash(sequence, F_false); + + assert_int_equal(status, F_true); + } + + ++line; + + } while (bytes > 0); + + fclose(file); + } +} + +void test__f_utf_character_is_word_dash__strict_is_true(void **state) { + + { + FILE *file = data__bytesequence_file_open__word_dash(); + + assert_non_null(file); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + + if (bytes > 0) { + const f_status_t status = f_utf_character_is_word_dash(sequence, F_true); + + assert_int_equal(status, F_true); + } + + ++line; + + } while (bytes > 0); + + fclose(file); + } +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash.h b/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash.h new file mode 100644 index 0000000..98f24ec --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash.h @@ -0,0 +1,27 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_character_is_word_dash_h +#define _TEST__F_utf_character_is_word_dash_h + +/** + * Test that the function works when strict is F_false. + * + * @see f_utf_character_is_word_dash() + */ +extern void test__f_utf_character_is_word_dash__strict_is_false(void **state); + +/** + * Test that the function works when strict is F_true. + * + * @see f_utf_character_is_word_dash() + */ +extern void test__f_utf_character_is_word_dash__strict_is_true(void **state); + +#endif // _TEST__F_utf_character_is_word_dash_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash_plus.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash_plus.c new file mode 100644 index 0000000..70f0832 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash_plus.c @@ -0,0 +1,68 @@ +#include "test-utf.h" +#include "test-utf-character_is_word_dash_plus.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void test__f_utf_character_is_word_dash_plus__strict_is_false(void **state) { + + { + FILE *file = data__bytesequence_file_open_lax__word_dash_plus(); + + assert_non_null(file); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + + if (bytes > 0) { + const f_status_t status = f_utf_character_is_word_dash_plus(sequence, F_false); + + assert_int_equal(status, F_true); + } + + ++line; + + } while (bytes > 0); + + fclose(file); + } +} + +void test__f_utf_character_is_word_dash_plus__strict_is_true(void **state) { + + { + FILE *file = data__bytesequence_file_open__word_dash_plus(); + + assert_non_null(file); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + + if (bytes > 0) { + const f_status_t status = f_utf_character_is_word_dash_plus(sequence, F_true); + + assert_int_equal(status, F_true); + } + + ++line; + + } while (bytes > 0); + + fclose(file); + } +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash_plus.h b/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash_plus.h new file mode 100644 index 0000000..1e96c51 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_word_dash_plus.h @@ -0,0 +1,27 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_character_is_word_dash_plus_h +#define _TEST__F_utf_character_is_word_dash_plus_h + +/** + * Test that the function works when strict is F_false. + * + * @see f_utf_character_is_word_dash_plus() + */ +extern void test__f_utf_character_is_word_dash_plus__strict_is_false(void **state); + +/** + * Test that the function works when strict is F_true. + * + * @see f_utf_character_is_word_dash_plus() + */ +extern void test__f_utf_character_is_word_dash_plus__strict_is_true(void **state); + +#endif // _TEST__F_utf_character_is_word_dash_plus_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c index dd00bfb..8ec43cb 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c @@ -5,50 +5,113 @@ extern "C" { #endif +static inline void private__test__f_utf_is_alphabetic__works(const f_status_t status, const f_utf_char_t sequence) { + + if (f_utf_character_is_valid(sequence) != F_true) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + assert_int_equal(status, F_status_set_error(F_utf_fragment)); + } + else { + assert_int_equal(status, F_false); + } + } + else if (f_utf_character_is_zero_width(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control_picture(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_combining(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace_modifier(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_numeric(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_punctuation(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_symbol(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_phonetic(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else { + assert_int_equal(status, F_true); + } +} + void test__f_utf_is_alphabetic__works(void **state) { - { - FILE *file = data__bytesequence_file_open__alphabetic(); + // To save time and effort, this test cheats by assuming the other functions are correct. + // Use the other functions to determine what the correct return result should be. + f_utf_char_t sequence = f_utf_char_t_initialize; - assert_non_null(file); + uint8_t buffer[5] = { 0, 0, 0, 0, 0 }; - f_utf_char_t sequence = 0; - ssize_t bytes = 0; + // 1-Byte (ASCII). + for (buffer[0] = 0x00; buffer[0] < 0x80; ++buffer[0]) { - f_array_length_t line = 0; + sequence = macro_f_utf_char_t_from_char_1(buffer[0]); - do { - bytes = data__bytesequence_get_line(file, &sequence); + private__test__f_utf_is_alphabetic__works(f_utf_is_alphabetic((f_string_t) buffer, 5), sequence); + } // for - if (bytes > 0) { - const uint8_t width = macro_f_utf_char_t_width(sequence); - char buffer[5] = { 0, 0, 0, 0, 0 }; + // 2-Byte (ASCII). + for (buffer[0] = 0xc2; buffer[0] < 0xe0; ++buffer[0]) { - buffer[0] = macro_f_utf_char_t_to_char_1(sequence); + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { - if (width > 1) { - buffer[1] = macro_f_utf_char_t_to_char_2(sequence); + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]); - if (width > 2) { - buffer[2] = macro_f_utf_char_t_to_char_3(sequence); + private__test__f_utf_is_alphabetic__works(f_utf_is_alphabetic((f_string_t) buffer, 5), sequence); + } // for + } // for - if (width > 3) { - buffer[3] = macro_f_utf_char_t_to_char_4(sequence); - } - } - } + buffer[1] = 0; - const f_status_t status = f_utf_is_alphabetic(buffer, 5); + // 3-Byte (ASCII). + for (buffer[0] = 0xe0; buffer[0] < 0xf0; ++buffer[0]) { - assert_int_equal(status, F_true); - } + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { - ++line; + for (buffer[2] = 0x80; buffer[2] < 0xbf; ++buffer[2]) { - } while (bytes > 0); + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]) | macro_f_utf_char_t_from_char_3(buffer[2]); - fclose(file); - } + private__test__f_utf_is_alphabetic__works(f_utf_is_alphabetic((f_string_t) buffer, 5), sequence); + } // for + } // for + } // for + + buffer[1] = 0; + buffer[2] = 0; + + // 4-Byte (ASCII). + for (buffer[0] = 0xf0; buffer[0] < 0xf8; ++buffer[0]) { + + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { + + for (buffer[2] = 0x80; buffer[2] < 0xbf; ++buffer[2]) { + + for (buffer[3] = 0x80; buffer[3] < 0xbf; ++buffer[3]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]) | macro_f_utf_char_t_from_char_3(buffer[2]) | macro_f_utf_char_t_from_char_4(buffer[3]); + + private__test__f_utf_is_alphabetic__works(f_utf_is_alphabetic((f_string_t) buffer, 5), sequence); + } // for + } // for + } // for + } // for } #ifdef __cplusplus diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_digit.c b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_digit.c new file mode 100644 index 0000000..586a254 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_digit.c @@ -0,0 +1,122 @@ +#include "test-utf.h" +#include "test-utf-is_alphabetic_digit.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void private__test__f_utf_is_alphabetic_digit__works(const f_status_t status, const f_utf_char_t sequence) { + + if (f_utf_character_is_valid(sequence) != F_true) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + assert_int_equal(status, F_status_set_error(F_utf_fragment)); + } + else { + assert_int_equal(status, F_false); + } + } + else if (f_utf_character_is_digit(sequence, 0) == F_true) { + assert_int_equal(status, F_true); + } + else if (f_utf_character_is_zero_width(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control_picture(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_combining(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace_modifier(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_numeric(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_punctuation(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_symbol(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_phonetic(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else { + assert_int_equal(status, F_true); + } +} + +void test__f_utf_is_alphabetic_digit__works(void **state) { + + // To save time and effort, this test cheats by assuming the other functions are correct. + // Use the other functions to determine what the correct return result should be. + f_utf_char_t sequence = f_utf_char_t_initialize; + + uint8_t buffer[5] = { 0, 0, 0, 0, 0 }; + + // 1-Byte (ASCII). + for (buffer[0] = 0x00; buffer[0] < 0x80; ++buffer[0]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]); + + private__test__f_utf_is_alphabetic_digit__works(f_utf_is_alphabetic_digit((f_string_t) buffer, 5, 0), sequence); + } // for + + // 2-Byte (ASCII). + for (buffer[0] = 0xc2; buffer[0] < 0xe0; ++buffer[0]) { + + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]); + + private__test__f_utf_is_alphabetic_digit__works(f_utf_is_alphabetic_digit((f_string_t) buffer, 5, 0), sequence); + } // for + } // for + + buffer[1] = 0; + + // 3-Byte (ASCII). + for (buffer[0] = 0xe0; buffer[0] < 0xf0; ++buffer[0]) { + + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { + + for (buffer[2] = 0x80; buffer[2] < 0xbf; ++buffer[2]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]) | macro_f_utf_char_t_from_char_3(buffer[2]); + + private__test__f_utf_is_alphabetic_digit__works(f_utf_is_alphabetic_digit((f_string_t) buffer, 5, 0), sequence); + } // for + } // for + } // for + + buffer[1] = 0; + buffer[2] = 0; + + // 4-Byte (ASCII). + for (buffer[0] = 0xf0; buffer[0] < 0xf8; ++buffer[0]) { + + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { + + for (buffer[2] = 0x80; buffer[2] < 0xbf; ++buffer[2]) { + + for (buffer[3] = 0x80; buffer[3] < 0xbf; ++buffer[3]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]) | macro_f_utf_char_t_from_char_3(buffer[2]) | macro_f_utf_char_t_from_char_4(buffer[3]); + + private__test__f_utf_is_alphabetic_digit__works(f_utf_is_alphabetic_digit((f_string_t) buffer, 5, 0), sequence); + } // for + } // for + } // for + } // for +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_digit.h b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_digit.h new file mode 100644 index 0000000..39b6d71 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_digit.h @@ -0,0 +1,20 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_is_alphabetic_digit_h +#define _TEST__F_utf_is_alphabetic_digit_h + +/** + * Test that the function works. + * + * @see f_utf_is_alphabetic_digit() + */ +extern void test__f_utf_is_alphabetic_digit__works(void **state); + +#endif // _TEST__F_utf_is_alphabetic_digit_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_numeric.c b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_numeric.c new file mode 100644 index 0000000..700070c --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_numeric.c @@ -0,0 +1,119 @@ +#include "test-utf.h" +#include "test-utf-is_alphabetic_numeric.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void private__test__f_utf_is_alphabetic_numeric__works(const f_status_t status, const f_utf_char_t sequence) { + + if (f_utf_character_is_valid(sequence) != F_true) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + assert_int_equal(status, F_status_set_error(F_utf_fragment)); + } + else { + assert_int_equal(status, F_false); + } + } + else if (f_utf_character_is_numeric(sequence) == F_true) { + assert_int_equal(status, F_true); + } + else if (f_utf_character_is_zero_width(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_control_picture(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_combining(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_whitespace_modifier(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_punctuation(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_symbol(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else if (f_utf_character_is_phonetic(sequence) == F_true) { + assert_int_equal(status, F_false); + } + else { + assert_int_equal(status, F_true); + } +} + +void test__f_utf_is_alphabetic_numeric__works(void **state) { + + // To save time and effort, this test cheats by assuming the other functions are correct. + // Use the other functions to determine what the correct return result should be. + f_utf_char_t sequence = f_utf_char_t_initialize; + + uint8_t buffer[5] = { 0, 0, 0, 0, 0 }; + + // 1-Byte (ASCII). + for (buffer[0] = 0x00; buffer[0] < 0x80; ++buffer[0]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]); + + private__test__f_utf_is_alphabetic_numeric__works(f_utf_is_alphabetic_numeric((f_string_t) buffer, 5), sequence); + } // for + + // 2-Byte (ASCII). + for (buffer[0] = 0xc2; buffer[0] < 0xe0; ++buffer[0]) { + + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]); + + private__test__f_utf_is_alphabetic_numeric__works(f_utf_is_alphabetic_numeric((f_string_t) buffer, 5), sequence); + } // for + } // for + + buffer[1] = 0; + + // 3-Byte (ASCII). + for (buffer[0] = 0xe0; buffer[0] < 0xf0; ++buffer[0]) { + + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { + + for (buffer[2] = 0x80; buffer[2] < 0xbf; ++buffer[2]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]) | macro_f_utf_char_t_from_char_3(buffer[2]); + + private__test__f_utf_is_alphabetic_numeric__works(f_utf_is_alphabetic_numeric((f_string_t) buffer, 5), sequence); + } // for + } // for + } // for + + buffer[1] = 0; + buffer[2] = 0; + + // 4-Byte (ASCII). + for (buffer[0] = 0xf0; buffer[0] < 0xf8; ++buffer[0]) { + + for (buffer[1] = 0x80; buffer[1] < 0xbf; ++buffer[1]) { + + for (buffer[2] = 0x80; buffer[2] < 0xbf; ++buffer[2]) { + + for (buffer[3] = 0x80; buffer[3] < 0xbf; ++buffer[3]) { + + sequence = macro_f_utf_char_t_from_char_1(buffer[0]) | macro_f_utf_char_t_from_char_2(buffer[1]) | macro_f_utf_char_t_from_char_3(buffer[2]) | macro_f_utf_char_t_from_char_4(buffer[3]); + + private__test__f_utf_is_alphabetic_numeric__works(f_utf_is_alphabetic_numeric((f_string_t) buffer, 5), sequence); + } // for + } // for + } // for + } // for +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_numeric.h b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_numeric.h new file mode 100644 index 0000000..a5ca6ef --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic_numeric.h @@ -0,0 +1,20 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_is_alphabetic_numeric_h +#define _TEST__F_utf_is_alphabetic_numeric_h + +/** + * Test that the function works. + * + * @see f_utf_is_alphabetic_numeric() + */ +extern void test__f_utf_is_alphabetic_numeric__works(void **state); + +#endif // _TEST__F_utf_is_alphabetic_numeric_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_word.c b/level_0/f_utf/tests/unit/c/test-utf-is_word.c index 14bef6a..9c75ec7 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_word.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_word.c @@ -8,7 +8,7 @@ extern "C" { void test__f_utf_is_word__strict_is_false(void **state) { { - FILE *file = data__bytesequence_file_open__word(); + FILE *file = data__bytesequence_file_open_lax__word(); assert_non_null(file); @@ -38,9 +38,8 @@ void test__f_utf_is_word__strict_is_false(void **state) { } } - const f_status_t status = f_utf_is_word(buffer, F_false, 5); + const f_status_t status = f_utf_is_word(buffer, 5, F_false); - // @todo provide an array of codes that should return false when not strict. assert_int_equal(status, F_true); } @@ -85,7 +84,7 @@ void test__f_utf_is_word__strict_is_true(void **state) { } } - const f_status_t status = f_utf_is_word(buffer, F_true, 5); + const f_status_t status = f_utf_is_word(buffer, 5, F_true); assert_int_equal(status, F_true); } diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_word_dash.c b/level_0/f_utf/tests/unit/c/test-utf-is_word_dash.c new file mode 100644 index 0000000..4b8bb45 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_word_dash.c @@ -0,0 +1,102 @@ +#include "test-utf.h" +#include "test-utf-is_word_dash.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void test__f_utf_is_word_dash__strict_is_false(void **state) { + + { + FILE *file = data__bytesequence_file_open_lax__word_dash(); + + assert_non_null(file); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + + if (bytes > 0) { + const uint8_t width = macro_f_utf_char_t_width(sequence); + char buffer[5] = { 0, 0, 0, 0, 0 }; + + buffer[0] = macro_f_utf_char_t_to_char_1(sequence); + + if (width > 1) { + buffer[1] = macro_f_utf_char_t_to_char_2(sequence); + + if (width > 2) { + buffer[2] = macro_f_utf_char_t_to_char_3(sequence); + + if (width > 3) { + buffer[3] = macro_f_utf_char_t_to_char_4(sequence); + } + } + } + + const f_status_t status = f_utf_is_word_dash(buffer, 5, F_false); + + assert_int_equal(status, F_true); + } + + ++line; + + } while (bytes > 0); + + fclose(file); + } +} + +void test__f_utf_is_word_dash__strict_is_true(void **state) { + + { + FILE *file = data__bytesequence_file_open__word_dash(); + + assert_non_null(file); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + + if (bytes > 0) { + const uint8_t width = macro_f_utf_char_t_width(sequence); + char buffer[5] = { 0, 0, 0, 0, 0 }; + + buffer[0] = macro_f_utf_char_t_to_char_1(sequence); + + if (width > 1) { + buffer[1] = macro_f_utf_char_t_to_char_2(sequence); + + if (width > 2) { + buffer[2] = macro_f_utf_char_t_to_char_3(sequence); + + if (width > 3) { + buffer[3] = macro_f_utf_char_t_to_char_4(sequence); + } + } + } + + const f_status_t status = f_utf_is_word_dash(buffer, 5, F_true); + + assert_int_equal(status, F_true); + } + + ++line; + + } while (bytes > 0); + + fclose(file); + } +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_word_dash.h b/level_0/f_utf/tests/unit/c/test-utf-is_word_dash.h new file mode 100644 index 0000000..02311ae --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_word_dash.h @@ -0,0 +1,27 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_is_word_dash_h +#define _TEST__F_utf_is_word_dash_h + +/** + * Test that the function works when strict is F_false. + * + * @see f_utf_is_word_dash() + */ +extern void test__f_utf_is_word_dash__strict_is_false(void **state); + +/** + * Test that the function works when strict is F_true. + * + * @see f_utf_is_word_dash() + */ +extern void test__f_utf_is_word_dash__strict_is_true(void **state); + +#endif // _TEST__F_utf_is_word_dash_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_word_dash_plus.c b/level_0/f_utf/tests/unit/c/test-utf-is_word_dash_plus.c new file mode 100644 index 0000000..0f0ed19 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_word_dash_plus.c @@ -0,0 +1,102 @@ +#include "test-utf.h" +#include "test-utf-is_word_dash_plus.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void test__f_utf_is_word_dash_plus__strict_is_false(void **state) { + + { + FILE *file = data__bytesequence_file_open_lax__word_dash_plus(); + + assert_non_null(file); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + + if (bytes > 0) { + const uint8_t width = macro_f_utf_char_t_width(sequence); + char buffer[5] = { 0, 0, 0, 0, 0 }; + + buffer[0] = macro_f_utf_char_t_to_char_1(sequence); + + if (width > 1) { + buffer[1] = macro_f_utf_char_t_to_char_2(sequence); + + if (width > 2) { + buffer[2] = macro_f_utf_char_t_to_char_3(sequence); + + if (width > 3) { + buffer[3] = macro_f_utf_char_t_to_char_4(sequence); + } + } + } + + const f_status_t status = f_utf_is_word_dash_plus(buffer, 5, F_false); + + assert_int_equal(status, F_true); + } + + ++line; + + } while (bytes > 0); + + fclose(file); + } +} + +void test__f_utf_is_word_dash_plus__strict_is_true(void **state) { + + { + FILE *file = data__bytesequence_file_open__word_dash_plus(); + + assert_non_null(file); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + + if (bytes > 0) { + const uint8_t width = macro_f_utf_char_t_width(sequence); + char buffer[5] = { 0, 0, 0, 0, 0 }; + + buffer[0] = macro_f_utf_char_t_to_char_1(sequence); + + if (width > 1) { + buffer[1] = macro_f_utf_char_t_to_char_2(sequence); + + if (width > 2) { + buffer[2] = macro_f_utf_char_t_to_char_3(sequence); + + if (width > 3) { + buffer[3] = macro_f_utf_char_t_to_char_4(sequence); + } + } + } + + const f_status_t status = f_utf_is_word_dash_plus(buffer, 5, F_true); + + assert_int_equal(status, F_true); + } + + ++line; + + } while (bytes > 0); + + fclose(file); + } +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_word_dash_plus.h b/level_0/f_utf/tests/unit/c/test-utf-is_word_dash_plus.h new file mode 100644 index 0000000..595130e --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_word_dash_plus.h @@ -0,0 +1,27 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_is_word_dash_plus_h +#define _TEST__F_utf_is_word_dash_plus_h + +/** + * Test that the function works when strict is F_false. + * + * @see f_utf_is_word_dash_plus() + */ +extern void test__f_utf_is_word_dash_plus__strict_is_false(void **state); + +/** + * Test that the function works when strict is F_true. + * + * @see f_utf_is_word_dash_plus() + */ +extern void test__f_utf_is_word_dash_plus__strict_is_true(void **state); + +#endif // _TEST__F_utf_is_word_dash_plus_h diff --git a/level_0/f_utf/tests/unit/c/test-utf.c b/level_0/f_utf/tests/unit/c/test-utf.c index 8a48129..81dbe34 100644 --- a/level_0/f_utf/tests/unit/c/test-utf.c +++ b/level_0/f_utf/tests/unit/c/test-utf.c @@ -24,9 +24,9 @@ int main(void) { cmocka_unit_test(test__f_utf_append_assure_nulless__works), cmocka_unit_test(test__f_utf_append_nulless__works), - //cmocka_unit_test(test__f_utf_character_is_alphabetic__works), - //cmocka_unit_test(test__f_utf_character_is_alphabetic_digit__works), - //cmocka_unit_test(test__f_utf_character_is_alphabetic_numeric__works), + cmocka_unit_test(test__f_utf_character_is_alphabetic__works), + cmocka_unit_test(test__f_utf_character_is_alphabetic_digit__works), + cmocka_unit_test(test__f_utf_character_is_alphabetic_numeric__works), cmocka_unit_test(test__f_utf_character_is_combining__works), cmocka_unit_test(test__f_utf_character_is_control__works), cmocka_unit_test(test__f_utf_character_is_digit__works), @@ -41,10 +41,16 @@ int main(void) { cmocka_unit_test(test__f_utf_character_is_valid__works), //cmocka_unit_test(test__f_utf_character_is_whitespace__works), cmocka_unit_test(test__f_utf_character_is_wide__works), - //cmocka_unit_test(test__f_utf_character_is_zero_width__works), + cmocka_unit_test(test__f_utf_character_is_zero_width__works), - //cmocka_unit_test(test__f_utf_character_is_word__strict_is_false), - //cmocka_unit_test(test__f_utf_character_is_word__strict_is_true), + cmocka_unit_test(test__f_utf_character_is_word__strict_is_false), + cmocka_unit_test(test__f_utf_character_is_word__strict_is_true), + + cmocka_unit_test(test__f_utf_character_is_word_dash__strict_is_false), + cmocka_unit_test(test__f_utf_character_is_word_dash__strict_is_true), + + cmocka_unit_test(test__f_utf_character_is_word_dash_plus__strict_is_false), + cmocka_unit_test(test__f_utf_character_is_word_dash_plus__strict_is_true), cmocka_unit_test(test__f_utf_dynamic_adjust__works), @@ -134,9 +140,9 @@ int main(void) { cmocka_unit_test(test__f_utf_dynamicss_increase_by__works), cmocka_unit_test(test__f_utf_dynamicss_resize__works), - //cmocka_unit_test(test__f_utf_is_alphabetic__works), - //cmocka_unit_test(test__f_utf_is_alphabetic_digit__works), - //cmocka_unit_test(test__f_utf_is_alphabetic_numeric__works), + cmocka_unit_test(test__f_utf_is_alphabetic__works), + cmocka_unit_test(test__f_utf_is_alphabetic_digit__works), + cmocka_unit_test(test__f_utf_is_alphabetic_numeric__works), cmocka_unit_test(test__f_utf_is_combining__works), cmocka_unit_test(test__f_utf_is_control__works), cmocka_unit_test(test__f_utf_is_digit__works), @@ -151,10 +157,16 @@ int main(void) { cmocka_unit_test(test__f_utf_is_valid__works), //cmocka_unit_test(test__f_utf_is_whitespace__works), cmocka_unit_test(test__f_utf_is_wide__works), - //cmocka_unit_test(test__f_utf_is_zero_width__works), + cmocka_unit_test(test__f_utf_is_zero_width__works), + + cmocka_unit_test(test__f_utf_is_word__strict_is_false), + cmocka_unit_test(test__f_utf_is_word__strict_is_true), + + cmocka_unit_test(test__f_utf_is_word_dash__strict_is_false), + cmocka_unit_test(test__f_utf_is_word_dash__strict_is_true), - //cmocka_unit_test(test__f_utf_is_word__strict_is_false), - //cmocka_unit_test(test__f_utf_is_word__strict_is_true), + cmocka_unit_test(test__f_utf_is_word_dash_plus__strict_is_false), + cmocka_unit_test(test__f_utf_is_word_dash_plus__strict_is_true), cmocka_unit_test(test__f_utf_map_multis_adjust__works), cmocka_unit_test(test__f_utf_map_multis_append__works), diff --git a/level_0/f_utf/tests/unit/c/test-utf.h b/level_0/f_utf/tests/unit/c/test-utf.h index 92ca332..b96c4a8 100644 --- a/level_0/f_utf/tests/unit/c/test-utf.h +++ b/level_0/f_utf/tests/unit/c/test-utf.h @@ -34,6 +34,8 @@ #include "test-utf-append_assure_nulless.h" #include "test-utf-append_nulless.h" #include "test-utf-character_is_alphabetic.h" +#include "test-utf-character_is_alphabetic_digit.h" +#include "test-utf-character_is_alphabetic_numeric.h" #include "test-utf-character_is_combining.h" #include "test-utf-character_is_control.h" #include "test-utf-character_is_digit.h" @@ -50,6 +52,8 @@ #include "test-utf-character_is_whitespace.h" #include "test-utf-character_is_wide.h" #include "test-utf-character_is_word.h" +#include "test-utf-character_is_word_dash.h" +#include "test-utf-character_is_word_dash_plus.h" #include "test-utf-character_is_zero_width.h" #include "test-utf-dynamic_adjust.h" #include "test-utf-dynamic_append.h" @@ -103,6 +107,8 @@ #include "test-utf-dynamicss_increase_by.h" #include "test-utf-dynamicss_resize.h" #include "test-utf-is_alphabetic.h" +#include "test-utf-is_alphabetic_digit.h" +#include "test-utf-is_alphabetic_numeric.h" #include "test-utf-is_combining.h" #include "test-utf-is_control.h" #include "test-utf-is_digit.h" @@ -119,6 +125,8 @@ #include "test-utf-is_whitespace.h" #include "test-utf-is_wide.h" #include "test-utf-is_word.h" +#include "test-utf-is_word_dash.h" +#include "test-utf-is_word_dash_plus.h" #include "test-utf-is_zero_width.h" #include "test-utf-map_multis_adjust.h" #include "test-utf-map_multis_append.h"