From: Kevin Day Date: Mon, 20 Jun 2022 04:42:18 +0000 (-0500) Subject: Update: The f_utf project regarding digits and perform other clean ups. X-Git-Tag: 0.5.10~30 X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=e696e3941592c6910f2f8ecc87a698d4f618c3b4;p=fll Update: The f_utf project regarding digits and perform other clean ups. Redesign the digit and decimal behavior. The is digit functions now refers to base-10 but does not attempt to return the identified digit. The is decimal functions now refers to base-10 and supports providing the identified digit. The is decimal functions also support other base units than just base-10. The alphabetic digit/numeric functions now also have a alphebetic decimal function. Clean up more places in the code using "sequence" rather than "character" or "characters". Functions like f_utf_character_is_alpha_digit() are now like f_utf_character_is_alphabetic_digit(). Add related unit tests. The is digit functions have unit tests that tests if the digit returned is correct. I have not reviewed all of the "numeric" Unicode digits to confirm/deny that my is decimal functions are complete. I observed what looks like bugs in the alphabetic functions. In these cases the final return statement is returning F_false when they instead should be returning F_true. There are minor corrections in documentation. --- diff --git a/build/level_0/settings b/build/level_0/settings index 6984dec..e86fdf9 100644 --- a/build/level_0/settings +++ b/build/level_0/settings @@ -43,7 +43,7 @@ build_sources_library status_string.c build_sources_library string.c private-string.c string/common.c string/dynamic.c string/map.c string/map_multi.c string/private-dynamic.c string/private-map.c string/private-map_multi.c string/private-quantity.c string/private-range.c string/private-triple.c string/quantity.c string/range.c string/static.c string/triple.c build_sources_library type_array/array_length.c type_array/cell.c type_array/fll_id.c type_array/int8.c type_array/int16.c type_array/int32.c type_array/int64.c type_array/int128.c type_array/state.c type_array/status.c type_array/uint8.c type_array/uint16.c type_array/uint32.c type_array/uint64.c type_array/uint128.c build_sources_library type_array/private-array_length.c type_array/private-cell.c type_array/private-fll_id.c type_array/private-int8.c type_array/private-int16.c type_array/private-int32.c type_array/private-int64.c type_array/private-int128.c type_array/private-state.c type_array/private-status.c type_array/private-uint8.c type_array/private-uint16.c type_array/private-uint32.c type_array/private-uint64.c type_array/private-uint128.c -build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c +build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_decimal.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c build_sources_library utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/private-dynamic.c utf/private-map.c utf/private-map_multi.c utf/private-triple.c utf/private-is_unassigned.c utf/private-string.c utf/static.c utf/string.c utf/triple.c build_sources_library-level thread.c private-thread.c build_sources_library_shared diff --git a/build/monolithic/settings b/build/monolithic/settings index c652545..9a3e176 100644 --- a/build/monolithic/settings +++ b/build/monolithic/settings @@ -43,7 +43,7 @@ build_sources_library level_0/status_string.c build_sources_library level_0/string.c level_0/private-string.c level_0/string/common.c level_0/string/dynamic.c level_0/string/map.c level_0/string/map_multi.c level_0/string/private-dynamic.c level_0/string/private-map.c level_0/string/private-map_multi.c level_0/string/private-quantity.c level_0/string/private-range.c level_0/string/private-triple.c level_0/string/quantity.c level_0/string/range.c level_0/string/static.c level_0/string/triple.c build_sources_library level_0/type_array/array_length.c level_0/type_array/cell.c level_0/type_array/fll_id.c level_0/type_array/int8.c level_0/type_array/int16.c level_0/type_array/int32.c level_0/type_array/int64.c level_0/type_array/int128.c level_0/type_array/state.c level_0/type_array/status.c level_0/type_array/uint8.c level_0/type_array/uint16.c level_0/type_array/uint32.c level_0/type_array/uint64.c level_0/type_array/uint128.c build_sources_library level_0/type_array/private-array_length.c level_0/type_array/private-cell.c level_0/type_array/private-fll_id.c level_0/type_array/private-int8.c level_0/type_array/private-int16.c level_0/type_array/private-int32.c level_0/type_array/private-int64.c level_0/type_array/private-int128.c level_0/type_array/private-state.c level_0/type_array/private-status.c level_0/type_array/private-uint8.c level_0/type_array/private-uint16.c level_0/type_array/private-uint32.c level_0/type_array/private-uint64.c level_0/type_array/private-uint128.c -build_sources_library level_0/utf.c level_0/private-utf.c level_0/private-utf_alphabetic.c level_0/private-utf_combining.c level_0/private-utf_control.c level_0/private-utf_digit.c level_0/private-utf_emoji.c level_0/private-utf_numeric.c level_0/private-utf_phonetic.c level_0/private-utf_private.c level_0/private-utf_punctuation.c level_0/private-utf_subscript.c level_0/private-utf_superscript.c level_0/private-utf_symbol.c level_0/private-utf_valid.c level_0/private-utf_whitespace.c level_0/private-utf_wide.c level_0/private-utf_word.c level_0/private-utf_zero_width.c +build_sources_library level_0/utf.c level_0/private-utf.c level_0/private-utf_alphabetic.c level_0/private-utf_combining.c level_0/private-utf_control.c level_0/private-utf_decimal.c level_0/private-utf_digit.c level_0/private-utf_emoji.c level_0/private-utf_numeric.c level_0/private-utf_phonetic.c level_0/private-utf_private.c level_0/private-utf_punctuation.c level_0/private-utf_subscript.c level_0/private-utf_superscript.c level_0/private-utf_symbol.c level_0/private-utf_valid.c level_0/private-utf_whitespace.c level_0/private-utf_wide.c level_0/private-utf_word.c level_0/private-utf_zero_width.c build_sources_library level_0/utf/common.c level_0/utf/convert.c level_0/utf/dynamic.c level_0/utf/is.c level_0/utf/is_character.c level_0/utf/map.c level_0/utf/map_multi.c level_0/utf/static.c level_0/utf/string.c level_0/utf/triple.c level_0/utf/private-dynamic.c level_0/utf/private-is_unassigned.c level_0/utf/private-map.c level_0/utf/private-map_multi.c level_0/utf/private-triple.c level_0/utf/private-string.c build_sources_library level_1/control_group.c diff --git a/level_0/f_utf/c/private-utf.c b/level_0/f_utf/c/private-utf.c index 3245789..4d95c03 100644 --- a/level_0/f_utf/c/private-utf.c +++ b/level_0/f_utf/c/private-utf.c @@ -6,7 +6,7 @@ extern "C" { #endif -#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to) +#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to) f_status_t private_f_utf_char_to_character(const f_string_t sequence, const f_array_length_t width_max, f_utf_char_t *character_utf) { if (!macro_f_utf_byte_width_is(*sequence)) { @@ -45,7 +45,7 @@ extern "C" { return F_none; } -#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to) +#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to) #if !defined(_di_f_utf_unicode_to_) || !defined(_di_f_utf_character_unicode_to_) f_status_t private_f_utf_character_unicode_to(const f_utf_char_t sequence, uint32_t *codepoint) { diff --git a/level_0/f_utf/c/private-utf.h b/level_0/f_utf/c/private-utf.h index 60ef262..4839420 100644 --- a/level_0/f_utf/c/private-utf.h +++ b/level_0/f_utf/c/private-utf.h @@ -44,12 +44,13 @@ extern "C" { * @see f_utf_character_is_valid() * @see f_utf_is_valid() * @see f_utf_is_alphabetic() - * @see f_utf_is_alphabetic_digit() + * @see f_utf_is_alphabetic_decimal() * @see f_utf_is_alphabetic_numeric() * @see f_utf_is_ascii() * @see f_utf_is_combining() * @see f_utf_is_control() * @see f_utf_is_control_picture() + * @see f_utf_is_decimal() * @see f_utf_is_digit() * @see f_utf_is_emoji() * @see f_utf_is_graph() @@ -70,9 +71,9 @@ extern "C" { * @see f_utf_is_zero_width() * @see f_utf_unicode_to() */ -#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to) +#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to) extern f_status_t private_f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_char_t *character_utf) F_attribute_visibility_internal_d; -#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to) +#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to) /** * Private implementation of f_utf_character_is_zero_width(). diff --git a/level_0/f_utf/c/private-utf_alphabetic.c b/level_0/f_utf/c/private-utf_alphabetic.c index a35c162..83d6dfd 100644 --- a/level_0/f_utf/c/private-utf_alphabetic.c +++ b/level_0/f_utf/c/private-utf_alphabetic.c @@ -3,6 +3,7 @@ #include "private-utf_alphabetic.h" #include "private-utf_combining.h" #include "private-utf_control.h" +#include "private-utf_decimal.h" #include "private-utf_digit.h" #include "private-utf_numeric.h" #include "private-utf_phonetic.h" @@ -63,6 +64,54 @@ extern "C" { } #endif // !defined(_di_f_utf_character_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_) +#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) + f_status_t private_f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) { + + if (private_f_utf_character_is_decimal(sequence, F_true, value)) { + return F_true; + } + + if (private_f_utf_character_is_zero_width(sequence)) { + return F_false; + } + + // The is_control() handles both is_control_code() and is_control_format(). + if (private_f_utf_character_is_control(sequence)) { + return F_false; + } + + if (private_f_utf_character_is_control_picture(sequence)) { + return F_false; + } + + if (private_f_utf_character_is_whitespace(sequence)) { + return F_false; + } + + if (private_f_utf_character_is_whitespace_modifier(sequence)) { + return F_false; + } + + if (private_f_utf_character_is_numeric(sequence)) { + return F_false; + } + + if (private_f_utf_character_is_punctuation(sequence)) { + return F_false; + } + + if (private_f_utf_character_is_symbol(sequence)) { + return F_false; + } + + if (private_f_utf_character_is_phonetic(sequence)) { + return F_false; + } + + return F_true; + } +#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) + #if !defined(_di_f_utf_character_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_digit_) f_status_t private_f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence) { @@ -107,7 +156,7 @@ extern "C" { return F_false; } - return F_false; + return F_true; } #endif // !defined(_di_f_utf_character_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_digit_) @@ -151,7 +200,7 @@ extern "C" { return F_false; } - return F_false; + return F_true; } #endif // !defined(_di_f_utf_character_is_alphabetic_numeric_) || !defined(_di_f_utf_is_alphabetic_numeric_) diff --git a/level_0/f_utf/c/private-utf_alphabetic.h b/level_0/f_utf/c/private-utf_alphabetic.h index 283142a..4cb7085 100644 --- a/level_0/f_utf/c/private-utf_alphabetic.h +++ b/level_0/f_utf/c/private-utf_alphabetic.h @@ -42,6 +42,38 @@ extern "C" { #endif // !defined(_di_f_utf_character_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_) /** + * Private implementation of f_utf_character_is_alphabetic_decimal(). + * + * Intended to be shared to each of the different implementation variations. + * + * This expects the character width to be of at least size 2. + * + * @param sequence + * The byte sequence to validate as a character. + * @param value + * (optional) The integer representation of the character if the character is a decimal. + * If specified, value is set to 0xffffffff (F_type_size_max_32_unsigned_d) to represent no known representation. + * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10). + * If specified and is initially a value from 1 to 16, then this represents operating as that base unit. + * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal". + * If specified and is initially a value of 0xffffffff (F_type_size_max_32_unsigned_d), then this will grab all known integer digits. + * Set to NULL to not use. + * + * @return + * F_true if a UTF-8 alphabetic or a digit character. + * F_false if not a UTF-8 alphabetic nor a digit character. + * + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if unicode is an invalid Unicode character. + * + * @see f_utf_character_is_alphabetic_decimal() + * @see f_utf_is_alphabetic_decimal() + */ +#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) + extern f_status_t private_f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) F_attribute_visibility_internal_d; +#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) + +/** * Private implementation of f_utf_character_is_alphabetic_digit(). * * Intended to be shared to each of the different implementation variations. diff --git a/level_0/f_utf/c/private-utf_decimal.c b/level_0/f_utf/c/private-utf_decimal.c new file mode 100644 index 0000000..5f7d1ae --- /dev/null +++ b/level_0/f_utf/c/private-utf_decimal.c @@ -0,0 +1,913 @@ +#include "utf.h" +#include "private-utf.h" +#include "private-utf_decimal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Inline helper function to reduce amount of code typed. + * + * Given the value, this will conditionally convert the range into an appropriate base-10 integer. + * + * This does not handle non-decimal values (non-base-10). + * + * @param sequence + * The character sequence to process. + * @param always + * The always return F_true boolean. + * @param start + * An inclusive start range. + * The stop range is determined from this. + * @param value + * The value to update, if non-NULL. + * + * @return + * F_true for valid digit in the requested range. + * F_false, otherwise. + */ +static inline f_status_t private_inline_f_utf_character_handle_decimal(const f_utf_char_t sequence, const bool always, const f_utf_char_t start, uint32_t * const value) { + + if (value) { + f_char_t ascii = 0; + + if (macro_f_utf_char_t_width(sequence) == 2) { + ascii = (f_char_t) macro_f_utf_char_t_to_char_2(sequence - start); + } + else if (macro_f_utf_char_t_width(sequence) == 3) { + ascii = (f_char_t) macro_f_utf_char_t_to_char_3(sequence - start); + } + else if (macro_f_utf_char_t_width(sequence) == 4) { + ascii = (f_char_t) macro_f_utf_char_t_to_char_4(sequence - start); + } + + ascii += 0x30; + + return private_f_utf_character_is_decimal_for_ascii(ascii, always, value); + } + + return F_true; +} + +/** + * Inline helper function to reduce amount of code typed. + * + * Given the value, this will conditionally convert the range into an appropriate base-10 integer from 1 to 9. + * + * This does not handle non-decimal values (non-base-10). + * + * @param sequence + * The character sequence to process. + * @param always + * The always return F_true boolean. + * @param start + * An inclusive start range. + * The stop range is determined from this. + * @param value + * The value to update, if non-NULL. + * + * @return + * F_true for valid digit in the requested range. + * F_false, otherwise. + */ +static inline f_status_t private_inline_f_utf_character_handle_roman_numeral(const f_utf_char_t sequence, const bool always, const f_utf_char_t start, uint32_t * const value) { + + if (value) { + const f_char_t ascii = ((f_char_t) macro_f_utf_char_t_to_char_3(sequence - start)) + 0x31; + + return private_f_utf_character_is_decimal_for_ascii(ascii, always, value); + } + + return F_true; +} + +#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_) + f_status_t private_f_utf_character_is_decimal(const f_utf_char_t sequence, const bool always, uint32_t * const value) { + + if (macro_f_utf_char_t_width_is(sequence) == 2) { + + // Arabic: U+0660 to U+0669. + if (sequence >= 0xd9a00000 && sequence <= 0xd9a90000) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xd9a00000, value); + } + + // Extended Arabic: U+06F0 to U+06F9. + if (sequence >= 0xdbb00000 && sequence <= 0xdbb90000) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xdbb00000, value); + } + + // NKo: U+07C0 to U+07C9. + if (sequence >= 0xdf800000 && sequence <= 0xdf890000) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xdf800000, value); + } + + return F_false; + } + + if (macro_f_utf_char_t_width_is(sequence) == 3) { + + if (macro_f_utf_char_t_to_char_1(sequence) == 0xe0) { + + // Devanagari: U+0966 to U+096F. + if (sequence >= 0xe0a5a600 && sequence <= 0xe0a5af00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a5a600, value); + } + + // Bengali: U+09E6 to U+09EF. + if (sequence >= 0xe0a7a600 && sequence <= 0xe0a7af00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a7a600, value); + } + + // Gurmukhi: U+0A66 to U+0A6F. + if (sequence >= 0xe0a9a600 && sequence <= 0xe0a9af00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a9a600, value); + } + + // Gujarati: U+0AE6 to U+0AEF. + if (sequence >= 0xe0aba600 && sequence <= 0xe0abaf00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0aba600, value); + } + + // Oriya: U+0B66 to U+0B6F. + if (sequence >= 0xe0ada600 && sequence <= 0xe0adaf00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0ada600, value); + } + + // Tamil: U+0BE6 to U+0BEF. + if (sequence >= 0xe0afa600 && sequence <= 0xe0afaf00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0afa600, value); + } + + // Telugu: U+0C66 to U+0C6F. + if (sequence >= 0xe0b1a600 && sequence <= 0xe0b1af00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b1a600, value); + } + + // Kannada: U+0CE6 to U+0CEF. + if (sequence >= 0xe0b3a600 && sequence <= 0xe0b3af00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b3a600, value); + } + + // Malayalam: U+0D66 to U+0D6F. + if (sequence >= 0xe0b5a600 && sequence <= 0xe0b5af00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b5a600, value); + } + + // Sinhala: U+0DE6 to U+0DEF. + if (sequence >= 0xe0b7a600 && sequence <= 0xe0b7af00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b7a600, value); + } + + // Thai: U+0E50 to U+0E59. + if (sequence >= 0xe0b99000 && sequence <= 0xe0b99900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b99000, value); + } + + // Lao: U+0ED0 to U+0ED9. + if (sequence >= 0xe0bb9000 && sequence <= 0xe0bb9900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0bb9000, value); + } + + // Tibetan: U+0F20 to U+0F29. + if (sequence >= 0xe0bca000 && sequence <= 0xe0bca900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0bca000, value); + } + } + else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe1) { + + // Myanmar: U+1040 to U+1049. + if (sequence >= 0xe1818000 && sequence <= 0xe1818900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1818000, value); + } + + // Myanmar (Shan): U+1090 to U+1099. + if (sequence >= 0xe1829000 && sequence <= 0xe1829900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1829000, value); + } + + // Khmer: U+17E0 to U+17E9. + if (sequence >= 0xe19fa000 && sequence <= 0xe19fa900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe19fa000, value); + } + + // Mongolian: U+1810 to U+1819. + if (sequence >= 0xe1a09000 && sequence <= 0xe1a09900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a09000, value); + } + + // Limbu: U+1946 to U+194F. + if (sequence >= 0xe1a58600 && sequence <= 0xe1a58f00) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a58600, value); + } + + // New Tai Lue: U+19D0 to U+19D9. + if (sequence >= 0xe1a79000 && sequence <= 0xe1a79900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a79000, value); + } + + // Tai Tham (Hora): U+1A80 to U+1A89. + if (sequence >= 0xe1aa8000 && sequence <= 0xe1aa8900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aa8000, value); + } + + // Tai Tham (Tham): U+1A90 to U+1A99. + if (sequence >= 0xe1aa9000 && sequence <= 0xe1aa9900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aa9000, value); + } + + // Balinese: U+1B50 to U+1B59. + if (sequence >= 0xe1ad9000 && sequence <= 0xe1ad9900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1ad9000, value); + } + + // Sundanese: U+1BB0 to U+1BB9. + if (sequence >= 0xe1aeb000 && sequence <= 0xe1aeb900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aeb000, value); + } + + // Lepcha: U+1C40 to U+1C49. + if (sequence >= 0xe1b18000 && sequence <= 0xe1b18900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1b18000, value); + } + + // Ol Chiki: U+1C50 to U+1C59. + if (sequence >= 0xe1b19000 && sequence <= 0xe1b19900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1b19000, value); + } + } + else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) { + + // Number Forms (Roman Numerals): U+2160 to U+2188. + if (sequence >= 0xe285a000 && sequence <= 0xe2868800) { + + // Roman Numerals (large) for 1-9: U+2160 to U+2168. + if (sequence >= 0xe285a000 && sequence <= 0xe285a800) { + return private_inline_f_utf_character_handle_roman_numeral(sequence, always, 0xe285a000, value); + } + + // Roman Numerals (small) for 1-9: U+2170 to U+2178. + if (sequence >= 0xe285b000 && sequence <= 0xe285b800) { + return private_inline_f_utf_character_handle_roman_numeral(sequence, always, 0xe285b000, value); + } + + // Roman Numeral (late form) for 6: U+2185. + if (!value || *value > 5) { + if (sequence == 0xe2868500) { + *value = 6; + + return F_true; + } + } + + if (value) { + if (*value > 9) { + + // Roman Numeral: U+2169, U+2179. + if (sequence == 0xe285a900 || sequence == 0xe285b900) { + *value = 10; + + return F_true; + } + + if (*value > 10) { + + // Roman Numeral: U+216A, U+217A. + if (sequence == 0xe285aa00 || sequence == 0xe285ba00) { + *value = 11; + + return F_true; + } + + if (*value > 11) { + + // Roman Numeral: U+216B, U+217B. + if (sequence == 0xe285ab00 || sequence == 0xe285bb00) { + *value = 12; + + return F_true; + } + + // All remaining are out of the range 0-16 and value must be set to 0xffff for them to be processed. + if (*value == F_type_size_max_32_unsigned_d) { + + // Roman Numeral: U+216C, U+217C, U+2186. + if (sequence == 0xe285ac00 || sequence == 0xe285bc00 || sequence == 0xe2868600) { + *value = 50; + + return F_true; + } + + // Roman Numeral: U+216D, U+217D, U+2183 (reversed, large), U+2184 (reversed, small). + if (sequence == 0xe285ad00 || sequence == 0xe285bd00 || sequence == 0xe2868300 || sequence == 0xe2868400) { + *value = 100; + + return F_true; + } + + // Roman Numeral: U+216E, U+217E. + if (sequence == 0xe285ae00 || sequence == 0xe285be00) { + *value = 500; + + return F_true; + } + + // Roman Numeral: U+216F, U+217F, U+2180 (1000 "CD"). + if (sequence == 0xe285af00 || sequence == 0xe285bf00 || sequence == 0xe2868000) { + *value = 1000; + + return F_true; + } + + // Roman Numeral: U+2181. + if (sequence == 0xe2868100) { + *value = 5000; + + return F_true; + } + + // Roman Numeral: U+2182. + if (sequence == 0xe2868200) { + *value = 10000; + + return F_true; + } + + // Roman Numeral: U+2187. + if (sequence == 0xe2868700) { + *value = 50000; + + return F_true; + } + + // Roman Numeral: U+2188. + if (sequence == 0xe2868800) { + *value = 100000; + + return F_true; + } + } + } + } + } + + *value = F_type_size_max_32_unsigned_d; + } + + if (always) { + return F_true; + } + } + } + else if (macro_f_utf_char_t_to_char_1(sequence) == 0xea) { + + // Vai: U+A620 to U+A629. + if (sequence >= 0xea98a000 && sequence <= 0xea98a900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xea98a000, value); + } + + // Saurashtra: U+A8D0 to U+A8D9. + if (sequence >= 0xeaa39000 && sequence <= 0xeaa39900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa39000, value); + } + + // Kayah Li: U+A900 to U+A909. + if (sequence >= 0xeaa48000 && sequence <= 0xeaa48900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa48000, value); + } + + // Javanese: U+A9D0 to U+A9D9. + if (sequence >= 0xeaa79000 && sequence <= 0xeaa79900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa79000, value); + } + + // Myanmar Extended-B: U+A9F0 to U+A9F9. + if (sequence >= 0xeaa7b000 && sequence <= 0xeaa7b900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa7b000, value); + } + + // Cham: U+AA50 to U+AA59. + if (sequence >= 0xeaa99000 && sequence <= 0xeaa99900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa99000, value); + } + + // Meetei Mayek: U+ABF0 to U+ABF9. + if (sequence >= 0xeaafb000 && sequence <= 0xeaafb900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaafb000, value); + } + } + else if (macro_f_utf_char_t_to_char_1(sequence) == 0xef) { + + // Halfwidth and Fullwidth Forms: U+FF10 to U+FF19. + if (sequence >= 0xefbc9000 && sequence <= 0xefbc9900) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xefbc9000, value); + } + } + + return F_false; + } + + if (macro_f_utf_char_t_to_char_1(sequence) == 0xf0) { + + if (macro_f_utf_char_t_to_char_2(sequence) == 0x90) { + + // Osmanya: U+104A0 to U+104A9. + if (sequence >= 0xf09092a0 && sequence <= 0xf09092a9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09092a0, value); + } + + // Hanifi Rohingya: U+10D30 to U+10D39. + if (sequence >= 0xf090b4b0 && sequence <= 0xf090b4b9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf090b4b0, value); + } + } + else if (macro_f_utf_char_t_to_char_2(sequence) == 0x91) { + + // Brahmi: U+11066 to U+1106F. + if (sequence >= 0xf09181a6 && sequence <= 0xf09181af) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09181a6, value); + } + + // Sora Sompeng: U+110F0 to U+110F9. + if (sequence >= 0xf09183b0 && sequence <= 0xf09183b9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09183b0, value); + } + + // Chakma: U+11136 to U+1113F. + if (sequence >= 0xf09184b6 && sequence <= 0xf09184bf) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09184b6, value); + } + + // Sharada: U+111D0 to U+111D9. + if (sequence >= 0xf0918790 && sequence <= 0xf0918799) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0918790, value); + } + + // Khudawadi: U+112F0 to U+112F9. + if (sequence >= 0xf0918bb0 && sequence <= 0xf0918bb9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0918bb0, value); + } + + // Newa: U+11450 to U+11459. + if (sequence >= 0xf0919190 && sequence <= 0xf0919199) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919190, value); + } + + // Tirhuta: U+114D0 to U+114D9. + if (sequence >= 0xf0919390 && sequence <= 0xf0919399) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919390, value); + } + + // Modi: U+11650 to U+11659. + if (sequence >= 0xf0919990 && sequence <= 0xf0919999) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919990, value); + } + + // Takri: U+116C0 to U+116C9. + if (sequence >= 0xf0919b80 && sequence <= 0xf0919b89) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919b80, value); + } + + // Ahom: U+11730 to U+11739. + if (sequence >= 0xf0919cb0 && sequence <= 0xf0919cb9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919cb0, value); + } + + // Warang Citi: U+118E0 to U+118E9. + if (sequence >= 0xf091a3a0 && sequence <= 0xf091a3a9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091a3a0, value); + } + + // Dives Akuru: U+11950 to U+11959. + if (sequence >= 0xf091a590 && sequence <= 0xf091a599) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091a590, value); + } + + // Bhaiksuki: U+11C50 to U+11C59. + if (sequence >= 0xf091b190 && sequence <= 0xf091b199) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b190, value); + } + + // Masaram Gondi: U+11D50 to U+11D59. + if (sequence >= 0xf091b590 && sequence <= 0xf091b599) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b590, value); + } + + // Gunjala Gondi: U+11DA0 to U+11DA9. + if (sequence >= 0xf091b6a0 && sequence <= 0xf091b6a9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b6a0, value); + } + } + else if (macro_f_utf_char_t_to_char_2(sequence) == 0x96) { + + // Mro: U+16A60 to U+16A69. + if (sequence >= 0xf096a9a0 && sequence <= 0xf096a9a9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096a9a0, value); + } + + // Tangsa: U+16AC0 to U+16AC9. + if (sequence >= 0xf096ab80 && sequence <= 0xf096ab89) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096ab80, value); + } + + // Pahawh Hmong: U+16B50 to U+16B59. + if (sequence >= 0xf096ad90 && sequence <= 0xf096ad99) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096ad90, value); + } + } + else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9d) { + + // Mathematical Alphanumeric (Bold) Symbols: U+1D7CE to U+1D7D7. + if (sequence >= 0xf09d9f8e && sequence <= 0xf09d9f97) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9f8e, value); + } + + // Mathematical Alphanumeric (Double-Struck) Symbols: U+1D7D8 to U+1D7E1. + if (sequence >= 0xf09d9f98 && sequence <= 0xf09d9fa1) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9f98, value); + } + + // Mathematical Alphanumeric (Sans-Serif) Symbols: U+1D7E2 to U+1D7EB. + if (sequence >= 0xf09d9fa2 && sequence <= 0xf09d9fab) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fa2, value); + } + + // Mathematical Alphanumeric (Sans-Serif Bold) Symbols: U+1D7EC to U+1D7F5. + if (sequence >= 0xf09d9fac && sequence <= 0xf09d9fb5) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fac, value); + } + + // Mathematical Alphanumeric (Monospace) Symbols: U+1D7F6 to U+1D7FF. + if (sequence >= 0xf09d9fb6 && sequence <= 0xf09d9fbf) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fb6, value); + } + } + else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9e) { + + // Nyiakeng Puachue Hmong: U+1E140 to U+1E149. + if (sequence >= 0xf09e8580 && sequence <= 0xf09e8589) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09e8580, value); + } + + // Wancho: U+1E2F0 to U+1E2F9. + if (sequence >= 0xf09e8bb0 && sequence <= 0xf09e8bb9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09e8bb0, value); + } + + // Adlam: U+1E950 to U+1E959. + if (sequence >= 0xf09ea590 && sequence <= 0xf09ea599) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09ea590, value); + } + } + else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9f) { + + // Symbols for Legacy Computing (Segmented): U+1FBF0 to U+1FBF9. + if (sequence >= 0xf09fafb0 && sequence <= 0xf09fafb9) { + return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09fafb0, value); + } + } + } + + if (value) { + *value = F_type_size_max_32_unsigned_d; + } + + return F_false; + } +#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_) + +#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_) + f_status_t private_f_utf_character_is_decimal_for_ascii(const f_char_t character, const bool always, uint32_t * const value) { + + if (always) { + if (isdigit(character)) { + if (value) { + if (character == f_string_ascii_0_s.string[0]) { + *value = 0; + + return F_true; + } + else if (character == f_string_ascii_1_s.string[0]) { + if (!*value || *value > 1) { + *value = 1; + } + else { + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + else if (character == f_string_ascii_2_s.string[0]) { + if (!*value || *value > 2) { + *value = 2; + } + else { + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + else if (character == f_string_ascii_3_s.string[0]) { + if (!*value || *value > 3) { + *value = 3; + } + else { + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + else if (character == f_string_ascii_4_s.string[0]) { + if (!*value || *value > 4) { + *value = 4; + } + else { + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + else if (character == f_string_ascii_5_s.string[0]) { + if (!*value || *value > 5) { + *value = 5; + } + else { + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + else if (character == f_string_ascii_6_s.string[0]) { + if (!*value || *value > 6) { + *value = 6; + + return F_true; + } + } + else if (character == f_string_ascii_7_s.string[0]) { + if (!*value || *value > 7) { + *value = 7; + } + else { + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + else if (character == f_string_ascii_8_s.string[0]) { + if (!*value || *value > 8) { + *value = 8; + } + else { + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + else if (character == f_string_ascii_9_s.string[0]) { + if (!*value || *value > 9) { + *value = 9; + } + else { + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + + if (*value > 10) { + if (character == f_string_ascii_a_s.string[0] || character == f_string_ascii_A_s.string[0]) { + *value = 10; + + return F_true; + } + + if (*value > 11) { + if (character == f_string_ascii_b_s.string[0] || character == f_string_ascii_B_s.string[0]) { + *value = 11; + + return F_true; + } + + if (*value > 12) { + if (character == f_string_ascii_c_s.string[0] || character == f_string_ascii_C_s.string[0]) { + *value = 12; + + return F_true; + } + + if (*value > 13) { + if (character == f_string_ascii_d_s.string[0] || character == f_string_ascii_D_s.string[0]) { + *value = 13; + + return F_true; + } + + if (*value > 14) { + if (character == f_string_ascii_e_s.string[0] || character == f_string_ascii_E_s.string[0]) { + *value = 14; + + return F_true; + } + + if (*value > 15) { + if (character == f_string_ascii_f_s.string[0] || character == f_string_ascii_F_s.string[0]) { + *value = 15; + + return F_true; + } + } + } + } + } + } + } + + *value = F_type_size_max_32_unsigned_d; + } + + return F_true; + } + + return F_false; + } + + if (isdigit(character)) { + if (value) { + if (character == f_string_ascii_0_s.string[0]) { + *value = 0; + + return F_true; + } + + if (character == f_string_ascii_1_s.string[0]) { + if (!*value || *value > 1) { + *value = 1; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (character == f_string_ascii_2_s.string[0]) { + if (!*value || *value > 2) { + *value = 2; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (character == f_string_ascii_3_s.string[0]) { + if (!*value || *value > 3) { + *value = 3; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (character == f_string_ascii_4_s.string[0]) { + if (!*value || *value > 4) { + *value = 4; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (character == f_string_ascii_5_s.string[0]) { + if (!*value || *value > 5) { + *value = 5; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (character == f_string_ascii_6_s.string[0]) { + if (!*value || *value > 6) { + *value = 6; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (character == f_string_ascii_7_s.string[0]) { + if (!*value || *value > 7) { + *value = 7; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (character == f_string_ascii_8_s.string[0]) { + if (!*value || *value > 8) { + *value = 8; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (character == f_string_ascii_9_s.string[0]) { + if (!*value || *value > 9) { + *value = 9; + + return F_true; + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + if (*value > 10) { + if (character == f_string_ascii_a_s.string[0] || character == f_string_ascii_A_s.string[0]) { + *value = 10; + + return F_true; + } + + if (*value > 11) { + if (character == f_string_ascii_b_s.string[0] || character == f_string_ascii_B_s.string[0]) { + *value = 11; + + return F_true; + } + + if (*value > 12) { + if (character == f_string_ascii_c_s.string[0] || character == f_string_ascii_C_s.string[0]) { + *value = 12; + + return F_true; + } + + if (*value > 13) { + if (character == f_string_ascii_d_s.string[0] || character == f_string_ascii_D_s.string[0]) { + *value = 13; + + return F_true; + } + + if (*value > 14) { + if (character == f_string_ascii_e_s.string[0] || character == f_string_ascii_E_s.string[0]) { + *value = 14; + + return F_true; + } + + if (*value > 15) { + if (character == f_string_ascii_f_s.string[0] || character == f_string_ascii_F_s.string[0]) { + *value = 15; + + return F_true; + } + } + } + } + } + } + } + + *value = F_type_size_max_32_unsigned_d; + + return F_false; + } + + return F_true; + } + + return F_false; + } +#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_) + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/c/private-utf_decimal.h b/level_0/f_utf/c/private-utf_decimal.h new file mode 100644 index 0000000..5e0f181 --- /dev/null +++ b/level_0/f_utf/c/private-utf_decimal.h @@ -0,0 +1,91 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Provides UTF-8 capabilities. + * + * These are provided for internal reduction in redundant code. + * These should not be exposed/used outside of this project. + */ +#ifndef _PRIVATE_F_utf_decimal_h +#define _PRIVATE_F_utf_decimal_h + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Private implementation of f_utf_character_is_decimal(). + * + * Intended to be shared to each of the different implementation variations. + * + * This expects the character width to be of at least size 2. + * + * @param sequence + * The byte sequence to validate as a character. + * @param always + * Set to F_true to always return F_true for valid digits even if the valid digit would be invalid because it is out of the requested base range. + * When F_false, this function returns F_true if the decimal digit is a valid decimal digit within the requested base range. + * @param value + * (optional) The integer representation of the character if the character is a decimal. + * If specified, value is set to 0xffff to represent no known representation. + * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10). + * If specified and is initially a value from 1 to 16, then this represents operating as that base unit. + * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal". + * If specified and is initially a value of 0xffff, then this will grab all known integer digits. + * Set to NULL to not use. + * + * @return + * F_true if a UTF-8 decimal character. + * F_false if not a UTF-8 decimal character. + * + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if unicode is an invalid Unicode character. + * + * @see f_utf_character_is_decimal() + * @see f_utf_is_decimal() + */ +#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_) + extern f_status_t private_f_utf_character_is_decimal(const f_utf_char_t sequence, const bool always, uint32_t * const value) F_attribute_visibility_internal_d; +#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_) + +/** + * Helper function for handling ascii-only tests. + * + * The width is always assumed to be 1. + * + * @param character + * The ASCII character to validate. + * @param always + * Set to F_true to always return F_true for valid digits even if the valid digit would be invalid because it is out of the requested base range. + * When F_false, this function returns F_true if the decimal digit is a valid decimal digit within the requested base range. + * @param value + * (optional) The integer representation of the character if the character is a decimal. + * If specified, value is set to 0xffffffff (F_type_size_max_32_unsigned_d) to represent no known representation. + * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10). + * If specified and is initially a value from 1 to 16, then this represents operating as that base unit. + * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal". + * If specified and is initially a value of 0xffffffff (F_type_size_max_32_unsigned_d), then this will grab all known integer digits. + * Set to NULL to not use. + * + * @return + * F_true if a UTF-8 decimal character. + * F_false if not a UTF-8 decimal character. + * + * @see isdigit() + * + * @see f_utf_character_is_decimal() + * @see f_utf_is_decimal() + */ +#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_) + extern f_status_t private_f_utf_character_is_decimal_for_ascii(const f_char_t character, const bool always, uint32_t * const value) F_attribute_visibility_internal_d; +#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_) + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // _PRIVATE_F_utf_decimal_h diff --git a/level_0/f_utf/c/private-utf_digit.c b/level_0/f_utf/c/private-utf_digit.c index 71b637a..3e18ece 100644 --- a/level_0/f_utf/c/private-utf_digit.c +++ b/level_0/f_utf/c/private-utf_digit.c @@ -6,7 +6,7 @@ extern "C" { #endif -#if !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_) +#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_) f_status_t private_f_utf_character_is_digit(const f_utf_char_t sequence) { if (macro_f_utf_char_t_width_is(sequence) == 2) { @@ -160,6 +160,13 @@ extern "C" { return F_true; } } + else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) { + + // Number Forms: U+2160 to U+2188 (Roman Numerals). + if (sequence >= 0xe285a000 && sequence <= 0xe2868800) { + return F_true; + } + } else if (macro_f_utf_char_t_to_char_1(sequence) == 0xea) { // Vai: U+A620 to U+A629. @@ -306,6 +313,11 @@ extern "C" { return F_true; } + // Tangsa: U+16AC0 to U+16AC9. + if (sequence >= 0xf096ab80 && sequence <= 0xf096ab89) { + return F_true; + } + // Pahawh Hmong: U+16B50 to U+16B59. if (sequence >= 0xf096ad90 && sequence <= 0xf096ad99) { return F_true; @@ -366,7 +378,7 @@ extern "C" { return F_false; } -#endif // !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_) +#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_) #ifdef __cplusplus } // extern "C" diff --git a/level_0/f_utf/c/private-utf_word.c b/level_0/f_utf/c/private-utf_word.c index 08008ad..0c77bd0 100644 --- a/level_0/f_utf/c/private-utf_word.c +++ b/level_0/f_utf/c/private-utf_word.c @@ -10,7 +10,7 @@ extern "C" { #if !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) f_status_t private_f_utf_character_is_word(const f_utf_char_t sequence, const bool strict) { - if (private_f_utf_character_is_alphabetic_digit(sequence)) { + if (private_f_utf_character_is_alphabetic_decimal(sequence, 0)) { return F_true; } diff --git a/level_0/f_utf/c/utf/common.h b/level_0/f_utf/c/utf/common.h index a7f868c..90f97da 100644 --- a/level_0/f_utf/c/utf/common.h +++ b/level_0/f_utf/c/utf/common.h @@ -146,32 +146,32 @@ extern "C" { #endif // _di_f_utf_substitute_ /** - * Provide a basic UTF-8 character as a single 4-byte variable. + * Provide a basic UTF-8 byte sequence as a single 4-byte variable. * - * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte character. + * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte sequence. * - * This "character" type is stored as a big-endian 4-byte integer (32-bits). - * A helper function, f_utf_is_big_endian(), is provided to detect system endianness so that character arrays (uint8_t []) can be correctly processed. + * This byte sequence type is stored as a big-endian 4-byte integer (32-bits). + * A helper function, f_utf_is_big_endian(), is provided to detect system endianness so that byte sequence arrays (uint8_t []) can be correctly processed. * * The byte structure is intended to be read left to right in memory regardless of system endianness. - * This is done so that the first character (the left most) can be read naturally as a string, such as string[0] = first character. + * This is done so that the first byte (the left most) can be read naturally as a string, such as string[0] = first byte. * * On little-endian systems, the hex-string 0xff is represented as internally as 0x000000ff. * This needs to be converted into the internal representation of 0xff000000 to be properly represented as a "f_utf_char_t". * - * The macro_f_utf_char_t_mask_byte_* are used to get the entire character set fo a given width. + * The macro_f_utf_char_t_mask_byte_* are used to get the entire byte sequence for a given width. * - * The macro_f_utf_char_t_mask_char_* are used to get a specific UTF-8 block as a single character range. + * The macro_f_utf_char_t_mask_char_* are used to get a specific UTF-8 block as a single byte sequence range. * * The macro_f_utf_char_t_to_char_* are used to convert a f_utf_char_t into a uint8_t, for a given 8-bit block. * * The macro_f_utf_char_t_from_char_* are used to convert a uint8_t into part of a f_utf_char_t, for a given 8-bit block. * - * The macro_f_utf_char_t_width is used to determine the width of the UTF-8 character based on macro_f_utf_byte_width. - * The macro_f_utf_char_t_width_is is used to determine the width of the UTF-8 character based on macro_f_utf_byte_width_is. + * The macro_f_utf_char_t_width is used to determine the width of the UTF-8 byte sequence based on macro_f_utf_byte_width. + * The macro_f_utf_char_t_width_is is used to determine the width of the UTF-8 byte sequence based on macro_f_utf_byte_width_is. * - * The macro_f_utf_char_t_width macro determines a width of the UTF-8 character based on macro_f_utf_byte_width. - * The macro_f_utf_char_t_width_is is identical to macro_f_utf_char_t_width, except it returns 0 when character is ASCII. + * The macro_f_utf_char_t_width macro determines a width of the UTF-8 byte sequence based on macro_f_utf_byte_width. + * The macro_f_utf_char_t_width_is is identical to macro_f_utf_char_t_width, except it returns 0 when byte sequence is ASCII. * * The macros that end in "_be" or "_le" represent "big endian" and "little endian". * The default macros without the "_be" should be in "big endian" because the strings are always stored as if they were "big endian" without regard to the host byte order. @@ -196,15 +196,15 @@ extern "C" { #define F_utf_char_mask_char_3_be_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000 #define F_utf_char_mask_char_4_be_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 - #define macro_f_utf_char_t_to_char_1_be(character) (((character) & F_utf_char_mask_char_1_be_d) >> 24) // Grab first byte. - #define macro_f_utf_char_t_to_char_2_be(character) (((character) & F_utf_char_mask_char_2_be_d) >> 16) // Grab second byte. - #define macro_f_utf_char_t_to_char_3_be(character) (((character) & F_utf_char_mask_char_3_be_d) >> 8) // Grab third byte. - #define macro_f_utf_char_t_to_char_4_be(character) ((character) & F_utf_char_mask_char_4_be_d) // Grab fourth byte. + #define macro_f_utf_char_t_to_char_1_be(sequence) (((sequence) & F_utf_char_mask_char_1_be_d) >> 24) // Grab first byte. + #define macro_f_utf_char_t_to_char_2_be(sequence) (((sequence) & F_utf_char_mask_char_2_be_d) >> 16) // Grab second byte. + #define macro_f_utf_char_t_to_char_3_be(sequence) (((sequence) & F_utf_char_mask_char_3_be_d) >> 8) // Grab third byte. + #define macro_f_utf_char_t_to_char_4_be(sequence) ((sequence) & F_utf_char_mask_char_4_be_d) // Grab fourth byte. - #define macro_f_utf_char_t_from_char_1_be(character) (((character) << 24) & F_utf_char_mask_char_1_be_d) // Shift to first byte. - #define macro_f_utf_char_t_from_char_2_be(character) (((character) << 16) & F_utf_char_mask_char_2_be_d) // Shift to second byte. - #define macro_f_utf_char_t_from_char_3_be(character) (((character) << 8) & F_utf_char_mask_char_3_be_d) // Shift to third byte. - #define macro_f_utf_char_t_from_char_4_be(character) ((character) & F_utf_char_mask_char_4_be_d) // Shift to fourth byte. + #define macro_f_utf_char_t_from_char_1_be(sequence) (((sequence) << 24) & F_utf_char_mask_char_1_be_d) // Shift to first byte. + #define macro_f_utf_char_t_from_char_2_be(sequence) (((sequence) << 16) & F_utf_char_mask_char_2_be_d) // Shift to second byte. + #define macro_f_utf_char_t_from_char_3_be(sequence) (((sequence) << 8) & F_utf_char_mask_char_3_be_d) // Shift to third byte. + #define macro_f_utf_char_t_from_char_4_be(sequence) ((sequence) & F_utf_char_mask_char_4_be_d) // Shift to fourth byte. // Little Endian. #define F_utf_char_mask_byte_1_le_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 @@ -217,15 +217,15 @@ extern "C" { #define F_utf_char_mask_char_3_le_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000 #define F_utf_char_mask_char_4_le_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 - #define macro_f_utf_char_t_to_char_1_le(character) ((character) & F_utf_char_mask_char_1_le_d) // Grab first byte. - #define macro_f_utf_char_t_to_char_2_le(character) (((character) & F_utf_char_mask_char_2_le_d) >> 8) // Grab second byte. - #define macro_f_utf_char_t_to_char_3_le(character) (((character) & F_utf_char_mask_char_3_le_d) >> 16) // Grab third byte. - #define macro_f_utf_char_t_to_char_4_le(character) (((character) & F_utf_char_mask_char_4_le_d) >> 24) // Grab fourth byte. + #define macro_f_utf_char_t_to_char_1_le(sequence) ((sequence) & F_utf_char_mask_char_1_le_d) // Grab first byte. + #define macro_f_utf_char_t_to_char_2_le(sequence) (((sequence) & F_utf_char_mask_char_2_le_d) >> 8) // Grab second byte. + #define macro_f_utf_char_t_to_char_3_le(sequence) (((sequence) & F_utf_char_mask_char_3_le_d) >> 16) // Grab third byte. + #define macro_f_utf_char_t_to_char_4_le(sequence) (((sequence) & F_utf_char_mask_char_4_le_d) >> 24) // Grab fourth byte. - #define macro_f_utf_char_t_from_char_1_le(character) ((character) & F_utf_char_mask_char_1_le_d) // Shift to first byte. - #define macro_f_utf_char_t_from_char_2_le(character) (((character) << 8) & F_utf_char_mask_char_2_le_d) // Shift to second byte. - #define macro_f_utf_char_t_from_char_3_le(character) (((character) << 16) & F_utf_char_mask_char_3_le_d) // Shift to third byte. - #define macro_f_utf_char_t_from_char_4_le(character) (((character) << 24) & F_utf_char_mask_char_4_le_d) // Shift to fourth byte. + #define macro_f_utf_char_t_from_char_1_le(sequence) ((sequence) & F_utf_char_mask_char_1_le_d) // Shift to first byte. + #define macro_f_utf_char_t_from_char_2_le(sequence) (((sequence) << 8) & F_utf_char_mask_char_2_le_d) // Shift to second byte. + #define macro_f_utf_char_t_from_char_3_le(sequence) (((sequence) << 16) & F_utf_char_mask_char_3_le_d) // Shift to third byte. + #define macro_f_utf_char_t_from_char_4_le(sequence) (((sequence) << 24) & F_utf_char_mask_char_4_le_d) // Shift to fourth byte. #define F_utf_char_mask_byte_1_d F_utf_char_mask_byte_1_be_d #define F_utf_char_mask_byte_2_d F_utf_char_mask_byte_2_be_d @@ -237,18 +237,18 @@ extern "C" { #define F_utf_char_mask_char_3_d F_utf_char_mask_char_3_be_d #define F_utf_char_mask_char_4_d F_utf_char_mask_char_4_be_d - #define macro_f_utf_char_t_to_char_1(character) macro_f_utf_char_t_to_char_1_be(character) - #define macro_f_utf_char_t_to_char_2(character) macro_f_utf_char_t_to_char_2_be(character) - #define macro_f_utf_char_t_to_char_3(character) macro_f_utf_char_t_to_char_3_be(character) - #define macro_f_utf_char_t_to_char_4(character) macro_f_utf_char_t_to_char_4_be(character) + #define macro_f_utf_char_t_to_char_1(sequence) macro_f_utf_char_t_to_char_1_be(sequence) + #define macro_f_utf_char_t_to_char_2(sequence) macro_f_utf_char_t_to_char_2_be(sequence) + #define macro_f_utf_char_t_to_char_3(sequence) macro_f_utf_char_t_to_char_3_be(sequence) + #define macro_f_utf_char_t_to_char_4(sequence) macro_f_utf_char_t_to_char_4_be(sequence) - #define macro_f_utf_char_t_from_char_1(character) macro_f_utf_char_t_from_char_1_be(character) - #define macro_f_utf_char_t_from_char_2(character) macro_f_utf_char_t_from_char_2_be(character) - #define macro_f_utf_char_t_from_char_3(character) macro_f_utf_char_t_from_char_3_be(character) - #define macro_f_utf_char_t_from_char_4(character) macro_f_utf_char_t_from_char_4_be(character) + #define macro_f_utf_char_t_from_char_1(sequence) macro_f_utf_char_t_from_char_1_be(sequence) + #define macro_f_utf_char_t_from_char_2(sequence) macro_f_utf_char_t_from_char_2_be(sequence) + #define macro_f_utf_char_t_from_char_3(sequence) macro_f_utf_char_t_from_char_3_be(sequence) + #define macro_f_utf_char_t_from_char_4(sequence) macro_f_utf_char_t_from_char_4_be(sequence) - #define macro_f_utf_char_t_width(character) (macro_f_utf_byte_width(macro_f_utf_char_t_to_char_1_be(character))) - #define macro_f_utf_char_t_width_is(character) (macro_f_utf_byte_width_is(macro_f_utf_char_t_to_char_1_be(character))) + #define macro_f_utf_char_t_width(sequence) (macro_f_utf_byte_width(macro_f_utf_char_t_to_char_1_be(sequence))) + #define macro_f_utf_char_t_width_is(sequence) (macro_f_utf_byte_width_is(macro_f_utf_char_t_to_char_1_be(sequence))) #endif // _di_f_utf_char_t_ /** @@ -300,16 +300,16 @@ extern "C" { #endif // _di_f_utf_string_t_ /** - * Define unicode special character widths. + * Define unicode special byte sequence widths. * * F_utf_width_*: * - none: Designate this is not a width value or has no width (aka: NULL). * - ambiguous: Characters appear in East Asian DBCS and in SBCS. - * - full: Wide character that has a equivilent to a narrow character. - * - half: Narrow character that has a equivilent to a wide character. - * - narrow: Narrow character, without a wide equivalent. + * - full: Wide byte sequence that has a equivilent to a narrow byte sequence. + * - half: Narrow byte sequence that has a equivilent to a wide byte sequence. + * - narrow: Narrow byte sequence, without a wide equivalent. * - nuetral: Characters that do not appear in East Asian DBCS codes. - * - wide: Wide character, without a narrow equivalent. + * - wide: Wide byte sequence, without a narrow equivalent. */ #ifndef _di_f_utf_widths_t_ enum { diff --git a/level_0/f_utf/c/utf/is.c b/level_0/f_utf/c/utf/is.c index 635119d..f65749a 100644 --- a/level_0/f_utf/c/utf/is.c +++ b/level_0/f_utf/c/utf/is.c @@ -3,6 +3,7 @@ #include "../private-utf_alphabetic.h" #include "../private-utf_combining.h" #include "../private-utf_control.h" +#include "../private-utf_decimal.h" #include "../private-utf_digit.h" #include "../private-utf_emoji.h" #include "../private-utf_numeric.h" @@ -24,123 +25,121 @@ extern "C" { #endif #ifndef _di_f_utf_is_ - f_status_t f_utf_is(const f_string_t character) { + f_status_t f_utf_is(const f_string_t sequence) { - return macro_f_utf_byte_width_is(*character); + return macro_f_utf_byte_width_is(*sequence); } #endif // _di_f_utf_is_ #ifndef _di_f_utf_is_alphabetic_ - f_status_t f_utf_is_alphabetic(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_alphabetic(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_alphabetic(character_utf); + return private_f_utf_character_is_alphabetic(utf); } - if (isalpha(*character)) { - return F_true; - } + if (isalpha(*sequence)) return F_true; return F_false; } #endif // _di_f_utf_is_alphabetic_ -#ifndef _di_f_utf_is_alphabetic_digit_ - f_status_t f_utf_is_alphabetic_digit(const f_string_t character, const f_array_length_t width_max) { +#ifndef _di_f_utf_is_alphabetic_decimal_ + f_status_t f_utf_is_alphabetic_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_alphabetic_digit(character_utf); + return private_f_utf_character_is_alphabetic_decimal(utf, value); } - if (isalnum(*character)) { + if (isalpha(*sequence)) return F_true; + + if (private_f_utf_character_is_decimal_for_ascii(*sequence, F_true, value) == F_true) { return F_true; } return F_false; } -#endif // _di_f_utf_is_alphabetic_digit_ +#endif // _di_f_utf_is_alphabetic_decimal_ #ifndef _di_f_utf_is_alphabetic_numeric_ - f_status_t f_utf_is_alphabetic_numeric(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_alphabetic_numeric(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_alphabetic_numeric(character_utf); + return private_f_utf_character_is_alphabetic_numeric(utf); } - if (isalnum(*character)) { - return F_true; - } + if (isalnum(*sequence)) return F_true; return F_false; } #endif // _di_f_utf_is_alphabetic_numeric_ #ifndef _di_f_utf_is_ascii_ - f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_ascii(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } @@ -152,28 +151,28 @@ extern "C" { #endif // _di_f_utf_is_ascii_ #ifndef _di_f_utf_is_combining_ - f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_combining(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_combining(character_utf); + return private_f_utf_character_is_combining(utf); } // There are no ASCII combining characters. @@ -182,94 +181,90 @@ extern "C" { #endif // _di_f_utf_is_combining_ #ifndef _di_f_utf_is_control_ - f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_control(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_control(character_utf); + return private_f_utf_character_is_control(utf); } - if (iscntrl(*character)) { - return F_true; - } + if (iscntrl(*sequence)) return F_true; return F_false; } #endif // _di_f_utf_is_control_ #ifndef _di_f_utf_is_control_code - f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_control_code(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_control_code(character_utf); + return private_f_utf_character_is_control_code(utf); } - if (iscntrl(*character)) { - return F_true; - } + if (iscntrl(*sequence)) return F_true; return F_false; } #endif // _di_f_utf_is_control_code_ #ifndef _di_f_utf_is_control_format_ - f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_control_format(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_control_format(character_utf); + return private_f_utf_character_is_control_format(utf); } // There are no ASCII control formats. @@ -278,32 +273,32 @@ extern "C" { #endif // _di_f_utf_is_control_format_ #ifndef _di_f_utf_is_control_picture_ - f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_control_picture(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - if (macro_f_utf_byte_width_is(*character) != 3) { + if (macro_f_utf_byte_width_is(*sequence) != 3) { return F_false; } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_control_picture(character_utf); + return private_f_utf_character_is_control_picture(utf); } // There are no ASCII control pictures. @@ -311,62 +306,89 @@ extern "C" { } #endif // _di_f_utf_is_control_picture_ -#ifndef _di_f_utf_is_digit_ - f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max) { +#ifndef _di_f_utf_is_decimal_ + f_status_t f_utf_is_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_digit(character_utf); + return private_f_utf_character_is_decimal(utf, F_true, value); } - if (isdigit(*character)) { - return F_true; + return private_f_utf_character_is_decimal_for_ascii(*sequence, F_false, value); + } +#endif // _di_f_utf_is_decimal_ + +#ifndef _di_f_utf_is_digit_ + f_status_t f_utf_is_digit(const f_string_t sequence, const f_array_length_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return F_status_set_error(F_parameter); + #endif // _di_level_0_parameter_checking_ + + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { + return F_status_set_error(F_complete_not_utf); + } + + if (macro_f_utf_byte_width_is(*sequence) == 1) { + return F_status_set_error(F_utf_fragment); + } + + f_utf_char_t utf = 0; + + { + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); + if (F_status_is_error(status)) return status; + } + + return private_f_utf_character_is_digit(utf); } + if (isdigit(*sequence)) return F_true; + return F_false; } #endif // _di_f_utf_is_digit_ #ifndef _di_f_utf_is_emoji_ - f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_emoji(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_emoji(character_utf); + return private_f_utf_character_is_emoji(utf); } return F_false; @@ -374,9 +396,9 @@ extern "C" { #endif // _di_f_utf_is_emoji_ #ifndef _di_f_utf_is_fragment_ - f_status_t f_utf_is_fragment(const f_string_t character) { + f_status_t f_utf_is_fragment(const f_string_t sequence) { - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_true; } @@ -385,107 +407,103 @@ extern "C" { #endif // _di_f_utf_is_fragment_ #ifndef _di_f_utf_is_graph_ - f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_graph(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - if (private_f_utf_character_is_control(character_utf)) { + if (private_f_utf_character_is_control(utf)) { return F_false; } - if (private_f_utf_character_is_whitespace(character_utf)) { + if (private_f_utf_character_is_whitespace(utf)) { return F_false; } // Zero-width characters are be treated as a non-graph. - if (private_f_utf_character_is_zero_width(character_utf)) { + if (private_f_utf_character_is_zero_width(utf)) { return F_false; } return F_true; } - if (isgraph(*character)) { - return F_true; - } + if (isgraph(*sequence)) return F_true; return F_false; } #endif // _di_f_utf_is_graph_ #ifndef _di_f_utf_is_numeric_ - f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_numeric(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_numeric(character_utf); + return private_f_utf_character_is_numeric(utf); } - if (isdigit(*character)) { - return F_true; - } + if (isdigit(*sequence)) return F_true; return F_false; } #endif // _di_f_utf_is_numeric_ #ifndef _di_f_utf_is_phonetic_ - f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_phonetic(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_phonetic(character_utf); + return private_f_utf_character_is_phonetic(utf); } // There are no ASCII phonetic characters. @@ -494,28 +512,28 @@ extern "C" { #endif // _di_f_utf_is_phonetic_ #ifndef _di_f_utf_is_private_ - f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_private(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_private(character_utf); + return private_f_utf_character_is_private(utf); } // There are no ASCII private characters. @@ -524,57 +542,57 @@ extern "C" { #endif // _di_f_utf_is_private_ #ifndef _di_f_utf_is_punctuation_ - f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_punctuation(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_punctuation(character_utf); + return private_f_utf_character_is_punctuation(utf); } // ASCII: U+0021 '!' to U+0023 '#'. - if (character[0] > 0x20 && character[0] < 0x24) { + if (sequence[0] > 0x20 && sequence[0] < 0x24) { return F_true; } // ASCII: U+0025 '%' to U+002A '*'. - if (character[0] > 0x24 && character[0] < 0x2b) { + if (sequence[0] > 0x24 && sequence[0] < 0x2b) { return F_true; } // ASCII: U+002C ',' to U+002F '/'. - if (character[0] > 0x2b && character[0] < 0x30) { + if (sequence[0] > 0x2b && sequence[0] < 0x30) { return F_true; } // ASCII: U+003A ':', U+003B ';', U+003F '?', or U+0040 '@'. - if (character[0] == 0x3a || character[0] == 0x3b || character[0] == 0x3f || character[0] == 0x40) { + if (sequence[0] == 0x3a || sequence[0] == 0x3b || sequence[0] == 0x3f || sequence[0] == 0x40) { return F_true; } // ASCII: U+005B '[' to U+005D ']'. - if (character[0] > 0x5a && character[0] < 0x5e) { + if (sequence[0] > 0x5a && sequence[0] < 0x5e) { return F_true; } // ASCII: U+005F '_', U+007B '{', or U+007D '}'. - if (character[0] == 0x5f || character[0] == 0x7b || character[0] == 0x7d) { + if (sequence[0] == 0x5f || sequence[0] == 0x7b || sequence[0] == 0x7d) { return F_true; } @@ -583,28 +601,28 @@ extern "C" { #endif // _di_f_utf_is_punctuation_ #ifndef _di_f_utf_is_subscript_ - f_status_t f_utf_is_subscript(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_subscript(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_subscript(character_utf); + return private_f_utf_character_is_subscript(utf); } return F_false; @@ -612,28 +630,28 @@ extern "C" { #endif // _di_f_utf_is_subscript_ #ifndef _di_f_utf_is_superscript_ - f_status_t f_utf_is_superscript(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_superscript(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_superscript(character_utf); + return private_f_utf_character_is_superscript(utf); } return F_false; @@ -641,42 +659,42 @@ extern "C" { #endif // _di_f_utf_is_superscript_ #ifndef _di_f_utf_is_symbol_ - f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_symbol(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_symbol(character_utf); + return private_f_utf_character_is_symbol(utf); } // ASCII: U+0024 ('$') or U+002B ('+'). - if (character[0] == 0x24 || character[0] == 0x2b) { + if (sequence[0] == 0x24 || sequence[0] == 0x2b) { return F_true; } // ASCII: U+003C ('<') to U+003E ('>'). - if (character[0] >= 0x3c && character[0] <= 0x3e) { + if (sequence[0] >= 0x3c && sequence[0] <= 0x3e) { return F_true; } // ASCII: U+005E ('^'), U+0060 ('`'), U+007C ('|'), or U+007E ('~'). - if (character[0] == 0x5e || character[0] == 0x60 || character[0] == 0x7c || character[0] == 0x7e) { + if (sequence[0] == 0x5e || sequence[0] == 0x60 || sequence[0] == 0x7c || sequence[0] == 0x7e) { return F_true; } @@ -685,28 +703,28 @@ extern "C" { #endif // _di_f_utf_is_symbol_ #ifndef _di_f_utf_is_unassigned_ - f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_unassigned(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_unassigned(character_utf); + return private_f_utf_character_is_unassigned(utf); } // ASCII are never unassigned. @@ -715,28 +733,28 @@ extern "C" { #endif // _di_f_utf_is_unassigned_ #ifndef _di_f_utf_is_valid_ - f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_valid(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_valid(character_utf); + return private_f_utf_character_is_valid(utf); } // ASCII are valid. @@ -745,61 +763,59 @@ extern "C" { #endif // _di_f_utf_is_valid_ #ifndef _di_f_utf_is_whitespace_ - f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_whitespace(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_whitespace(character_utf); + return private_f_utf_character_is_whitespace(utf); } - if (isspace(*character)) { - return F_true; - } + if (isspace(*sequence)) return F_true; return F_false; } #endif // _di_f_utf_is_whitespace_ #ifndef _di_f_utf_is_whitespace_modifier_ - f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_whitespace_modifier(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_whitespace_modifier(character_utf); + return private_f_utf_character_is_whitespace_modifier(utf); } // There are no ASCII whitespace modifiers. @@ -808,28 +824,28 @@ extern "C" { #endif // _di_f_utf_is_whitespace_modifier_ #ifndef _di_f_utf_is_whitespace_other_ - f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_whitespace_other(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_whitespace_other(character_utf); + return private_f_utf_character_is_whitespace_other(utf); } // There are no ASCII whitespace other. @@ -838,25 +854,25 @@ extern "C" { #endif // _di_f_utf_is_whitespace_other_ #ifndef _di_f_utf_is_wide_ - f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_wide(const f_string_t sequence, const f_array_length_t width_max) { - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_wide(character_utf); + return private_f_utf_character_is_wide(utf); } // There are no wide ASCII characters. @@ -865,31 +881,31 @@ extern "C" { #endif // _di_f_utf_is_wide_ #ifndef _di_f_utf_is_word_ - f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict) { + f_status_t f_utf_is_word(const f_string_t sequence, const f_array_length_t width_max, const bool strict) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_word(character_utf, strict); + return private_f_utf_character_is_word(utf, strict); } - if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0]) { + if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0]) { return F_true; } @@ -898,31 +914,31 @@ extern "C" { #endif // _di_f_utf_is_word_ #ifndef _di_f_utf_is_word_dash_ - f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict) { + f_status_t f_utf_is_word_dash(const f_string_t sequence, const f_array_length_t width_max, const bool strict) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_word_dash(character_utf, strict); + return private_f_utf_character_is_word_dash(utf, strict); } - if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0]) { + if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0] || *sequence == f_string_ascii_minus_s.string[0]) { return F_true; } @@ -931,31 +947,31 @@ extern "C" { #endif // _di_f_utf_is_word_dash_ #ifndef _di_f_utf_is_word_dash_plus_ - f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict) { + f_status_t f_utf_is_word_dash_plus(const f_string_t sequence, const f_array_length_t width_max, const bool strict) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_word_dash_plus(character_utf, strict); + return private_f_utf_character_is_word_dash_plus(utf, strict); } - if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0] || *character == f_string_ascii_plus_s.string[0]) { + if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0] || *sequence == f_string_ascii_minus_s.string[0] || *sequence == f_string_ascii_plus_s.string[0]) { return F_true; } @@ -964,38 +980,38 @@ extern "C" { #endif // _di_f_utf_is_word_dash_plus_ #ifndef _di_f_utf_is_zero_width_ - f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max) { + f_status_t f_utf_is_zero_width(const f_string_t sequence, const f_array_length_t width_max) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ - if (macro_f_utf_byte_width_is(*character)) { - if (macro_f_utf_byte_width_is(*character) > width_max) { + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { return F_status_set_error(F_complete_not_utf); } - if (macro_f_utf_byte_width_is(*character) == 1) { + if (macro_f_utf_byte_width_is(*sequence) == 1) { return F_status_set_error(F_utf_fragment); } - f_utf_char_t character_utf = 0; + f_utf_char_t utf = 0; { - const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf); + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_zero_width(character_utf); + return private_f_utf_character_is_zero_width(utf); } // These control characters are considered zero-width spaces. - if (*character >= 0x00 && *character <= 0x08) { + if (*sequence >= 0x00 && *sequence <= 0x08) { return F_true; } - else if (*character >= 0x0c && *character <= 0x1f) { + else if (*sequence >= 0x0c && *sequence <= 0x1f) { return F_true; } - else if (*character == 0x7f) { + else if (*sequence == 0x7f) { return F_true; } diff --git a/level_0/f_utf/c/utf/is.h b/level_0/f_utf/c/utf/is.h index c75f85b..ca6c9b9 100644 --- a/level_0/f_utf/c/utf/is.h +++ b/level_0/f_utf/c/utf/is.h @@ -60,16 +60,39 @@ extern "C" { /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or digit character. * - * Digit characters are decimal digits and letter numbers. + * Decimal characters are decimal digits. * * This does not include number-like, such as 1/2 (½) or superscript 2 (²). * + * Decimal refers to a unit of base-10. + * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16. + * If base-10 is desired, then simply ignore values greater than 9. + * For example, a base-16 character 'a' would result in the integer 10. + * Just ignore the value. + * This also processes large values such as roman numerals. + * Roman Numerals, however conflict with the natural hexidecimal numbers. + * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals. + * For example, the Roman Numeral 'Ⅿ' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000. + * + * This function always returns F_true for valid decimal digits to avoid confusion between alphabetic and digits in regards to the base unit. + * The 'F' is a character and a base-16 digit. + * If this were to return F_false because it is greater than the requested base-12 then there would be confusion on whether or not 'F' is alphabetic. + * If the determined digit is greater than the requested base, the 0xffff is assigned to value. + * * @param sequence * The byte sequence to validate as a character. * There must be enough space allocated to compare against, as limited by width_max. * @param width_max * The maximum width available for checking. * Can be anything greater than 0. + * @param value + * (optional) The integer representation of the character if the character is a decimal. + * If specified, value is set to 0xffff to represent no known representation. + * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10). + * If specified and is initially a value from 1 to 16, then this represents operating as that base unit. + * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal". + * If specified and is initially a value of 0xffff, then this will grab all known integer digits. + * Set to NULL to not use. * * @return * F_true if a UTF-8 alphabet character. @@ -81,12 +104,35 @@ extern "C" { * * @see isalnum() */ +#ifndef _di_f_utf_is_alphabetic_decimal_ + extern f_status_t f_utf_is_alphabetic_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value); +#endif // _di_f_utf_is_alphabetic_decimal_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character. + * + * Digit characters are decimal digits. + * + * This does not include number-like, such as 1/2 (½) or superscript 2 (²). + * + * @param sequence + * The byte sequence to validate as a character. + * + * @return + * F_true if a UTF-8 alphabetic-digit character. + * F_false if not a UTF-8 alphabetic-digit character. + * + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if unicode is an invalid Unicode character. + * + * @see isalnum() + */ #ifndef _di_f_utf_is_alphabetic_digit_ extern f_status_t f_utf_is_alphabetic_digit(const f_string_t sequence, const f_array_length_t width_max); #endif // _di_f_utf_is_alphabetic_digit_ /** - * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character. + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character. * * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²). * @@ -105,7 +151,8 @@ extern "C" { * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * F_utf_not (with error bit) if Unicode is an invalid Unicode character. * - * @see isalnum() + * @see isalpha() + * @see isdigit() */ #ifndef _di_f_utf_is_alphabetic_numeric_ extern f_status_t f_utf_is_alphabetic_numeric(const f_string_t sequence, const f_array_length_t width_max); @@ -255,8 +302,64 @@ extern "C" { #endif // _di_f_utf_is_control_picture_ /** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 decimal character. + * + * Decimal characters are decimal digits. + * + * This does not include number-like, such as 1/2 (½) or superscript 2 (²). + * + * Decimal refers to a unit of base-10. + * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16. + * If base-10 is desired, then simply ignore values greater than 9. + * For example, a base-16 character 'a' would result in the integer 10. + * Just ignore the value. + * This also processes large values such as roman numerals. + * Roman Numerals, however conflict with the natural hexidecimal numbers. + * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals. + * For example, the Roman Numeral 'Ⅿ' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000. + * + * This function only returns F_true for valid decimal digits within the requested base. + * + * @param sequence + * The byte sequence to validate as a character. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * @param base + * (optional) The base digit to specify (up to base 16). + * Set to 0 to not use. + * This is ignored when value is NULL. + * @param value + * (optional) The integer representation of the character if the character is a decimal. + * If specified, value is set to 0xffff to represent no known representation. + * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10). + * If specified and is initially a value from 1 to 16, then this represents operating as that base unit. + * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal". + * If specified and is initially a value of 0xffff, then this will grab all known integer digits. + * Set to NULL to not use. + * + * @return + * F_true if a UTF-8 decimal character. + * F_false if not a UTF-8 decimal character. + * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if Unicode is an invalid Unicode character. + * + * @see isdigit() + */ +#ifndef _di_f_utf_is_decimal_ + extern f_status_t f_utf_is_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value); +#endif // _di_f_utf_is_decimal_ + +/** * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character. * + * Digit characters are decimal digits. + * + * This does not include number-like, such as 1/2 (½) or superscript 2 (²). + * * @param sequence * The byte sequence to validate as a character. * There must be enough space allocated to compare against, as limited by width_max. @@ -688,7 +791,7 @@ extern "C" { /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character. * - * A word character is alpha-digit or an underscore '_'. + * A word character is alphabetic-decimal or an underscore '_'. * * @param sequence * The byte sequence to validate as a character. @@ -718,7 +821,7 @@ extern "C" { /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character. * - * A word dash character is alpha-digit, an underscore '_' or a dash '-'. + * A word dash character is alphabetic-decimal, an underscore '_' or a dash '-'. * * Unicode appears to refer to dashes that connect words as a hyphen. * Therefore, only these hyphens are considered dashes for the purposes of this function. @@ -753,7 +856,7 @@ extern "C" { /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character. * - * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'. + * A word dash plus character is alphabetic-decimal, an underscore '_', a dash '-', or a plus '+'. * * Unicode appears to refer to dashes that connect words as a hyphen. * Therefore, only these hyphens are considered dashes for the purposes of this function. diff --git a/level_0/f_utf/c/utf/is_character.c b/level_0/f_utf/c/utf/is_character.c index 53f8192..381260b 100644 --- a/level_0/f_utf/c/utf/is_character.c +++ b/level_0/f_utf/c/utf/is_character.c @@ -3,6 +3,7 @@ #include "../private-utf_alphabetic.h" #include "../private-utf_combining.h" #include "../private-utf_control.h" +#include "../private-utf_decimal.h" #include "../private-utf_digit.h" #include "../private-utf_emoji.h" #include "../private-utf_numeric.h" @@ -57,8 +58,29 @@ extern "C" { } #endif // _di_f_utf_character_is_alphabetic_ +#ifndef _di_f_utf_character_is_alphabetic_decimal_ + f_status_t f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) { + + if (macro_f_utf_char_t_width_is(sequence)) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + return F_status_set_error(F_utf_fragment); + } + + return private_f_utf_character_is_alphabetic_decimal(sequence, value); + } + + if (isalpha(macro_f_utf_char_t_to_char_1(sequence))) return F_true; + + if (private_f_utf_character_is_decimal_for_ascii(macro_f_utf_char_t_to_char_1(sequence), F_true, value) == F_true) { + return F_true; + } + + return F_false; + } +#endif // _di_f_utf_character_is_alphabetic_decimal_ + #ifndef _di_f_utf_character_is_alphabetic_digit_ - f_status_t f_utf_character_is_alpha_digit(const f_utf_char_t sequence) { + f_status_t f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence) { if (macro_f_utf_char_t_width_is(sequence)) { if (macro_f_utf_char_t_width_is(sequence) == 1) { @@ -192,6 +214,21 @@ extern "C" { } #endif // _di_f_utf_character_is_control_picture_ +#ifndef _di_f_utf_character_is_decimal_ + f_status_t f_utf_character_is_decimal(const f_utf_char_t sequence, uint32_t * const value) { + + if (macro_f_utf_char_t_width_is(sequence)) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + return F_status_set_error(F_utf_fragment); + } + + return private_f_utf_character_is_decimal(sequence, F_false, value); + } + + return private_f_utf_character_is_decimal_for_ascii(macro_f_utf_char_t_to_char_1(sequence), F_false, value); + } +#endif // _di_f_utf_character_is_decimal_ + #ifndef _di_f_utf_character_is_digit_ f_status_t f_utf_character_is_digit(const f_utf_char_t sequence) { diff --git a/level_0/f_utf/c/utf/is_character.h b/level_0/f_utf/c/utf/is_character.h index 6c331ac..9c85072 100644 --- a/level_0/f_utf/c/utf/is_character.h +++ b/level_0/f_utf/c/utf/is_character.h @@ -57,7 +57,54 @@ extern "C" { /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character. * - * Digit characters are decimal digits and letter numbers. + * Decimal characters are decimal digits. + * + * This does not include number-like, such as 1/2 (½) or superscript 2 (²). + * + * Decimal refers to a unit of base-10. + * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16. + * If base-10 is desired, then simply ignore values greater than 9. + * For example, a base-16 character 'a' would result in the integer 10. + * Just ignore the value. + * This also processes large values such as roman numerals. + * Roman Numerals, however conflict with the natural hexidecimal numbers. + * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals. + * For example, the Roman Numeral 'Ⅿ' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000. + * + * This function always returns F_true for valid decimal digits to avoid confusion between alphabetic and digits in regards to the base unit. + * The 'F' is a character and a base-16 digit. + * If this were to return F_false because it is greater than the requested base-12 then there would be confusion on whether or not 'F' is alphabetic. + * If the determined digit is greater than the requested base, the 0xffff is assigned to value. + * + * @param sequence + * The byte sequence to validate as a character. + * @param value + * (optional) The integer representation of the character if the character is a decimal. + * If specified, value is set to 0xffff to represent no known representation. + * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10). + * If specified and is initially a value from 1 to 16, then this represents operating as that base unit. + * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal". + * If specified and is initially a value of 0xffff, then this will grab all known integer digits. + * Set to NULL to not use. + * + * @return + * F_true if a UTF-8 alphabetic-decimal character. + * F_false if not a UTF-8 alphabetic-decimal character. + * + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if unicode is an invalid Unicode character. + * + * @see isalpha() + * @see isdigit() + */ +#ifndef _di_f_utf_character_is_alphabetic_decimal_ + extern f_status_t f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value); +#endif // _di_f_utf_character_is_alphabetic_decimal_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character. + * + * Digit characters are decimal digits. * * This does not include number-like, such as 1/2 (½) or superscript 2 (²). * @@ -65,8 +112,8 @@ extern "C" { * The byte sequence to validate as a character. * * @return - * F_true if a UTF-8 alpha-digit character. - * F_false if not a UTF-8 alpha-digit character. + * F_true if a UTF-8 alphabetic-digit character. + * F_false if not a UTF-8 alphabetic-digit character. * * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * F_utf_not (with error bit) if unicode is an invalid Unicode character. @@ -74,7 +121,7 @@ extern "C" { * @see isalnum() */ #ifndef _di_f_utf_character_is_alphabetic_digit_ - extern f_status_t f_utf_character_is_alpha_digit(const f_utf_char_t sequence); + extern f_status_t f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence); #endif // _di_f_utf_character_is_alphabetic_digit_ /** @@ -215,9 +262,52 @@ extern "C" { #endif // _di_f_utf_character_is_control_picture_ /** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 decimal character. + * + * Decimal characters are decimal digits. + * + * This does not include number-like, such as 1/2 (½) or superscript 2 (²). + * + * Decimal refers to a unit of base-10. + * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16. + * If base-10 is desired, then simply ignore values greater than 9. + * For example, a base-16 character 'a' would result in the integer 10. + * Just ignore the value. + * This also processes large values such as roman numerals. + * Roman Numerals, however conflict with the natural hexidecimal numbers. + * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals. + * For example, the Roman Numeral 'Ⅿ' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000. + * + * This function only returns F_true for valid decimal digits within the requested base. + * + * @param sequence + * The byte sequence to validate as a character. + * @param value + * (optional) The integer representation of the character if the character is a decimal. + * If specified, value is set to 0xffff to represent no known representation. + * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10). + * If specified and is initially a value from 1 to 16, then this represents operating as that base unit. + * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal". + * If specified and is initially a value of 0xffff, then this will grab all known integer digits. + * Set to NULL to not use. + * + * @return + * F_true if a UTF-8 decimal character. + * F_false if not a UTF-8 decimal character. + * + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if unicode is an invalid Unicode character. + * + * @see isdecimal() + */ +#ifndef _di_f_utf_character_is_decimal_ + extern f_status_t f_utf_character_is_decimal(const f_utf_char_t sequence, uint32_t * const value); +#endif // _di_f_utf_character_is_decimal_ + +/** * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character. * - * Digit characters are decimal digits and letter numbers. + * Digit characters are decimal digits. * * This does not include number-like, such as 1/2 (½) or superscript 2 (²). * @@ -638,7 +728,7 @@ extern "C" { /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character. * - * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'. + * A word dash plus character is alphabetic-decimal, an underscore '_', a dash '-', or a plus '+'. * * Unicode appears to refer to dashes that connect words as a hyphen. * Therefore, only these hyphens are considered dashes for the purposes of this function. diff --git a/level_0/f_utf/data/build/settings b/level_0/f_utf/data/build/settings index 716f9b4..3d5bcfd 100644 --- a/level_0/f_utf/data/build/settings +++ b/level_0/f_utf/data/build/settings @@ -20,7 +20,7 @@ build_language c build_libraries -lc build_libraries-individual -lf_memory -lf_string -build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c +build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_decimal.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c build_sources_library utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/map_multi.c utf/static.c utf/string.c utf/triple.c build_sources_library utf/private-is_unassigned.c utf/private-dynamic.c utf/private-map.c utf/private-map_multi.c utf/private-string.c utf/private-triple.c diff --git a/level_0/f_utf/data/build/settings-tests b/level_0/f_utf/data/build/settings-tests index e1b5557..c755f5d 100644 --- a/level_0/f_utf/data/build/settings-tests +++ b/level_0/f_utf/data/build/settings-tests @@ -29,6 +29,7 @@ build_sources_program test-utf-append.c test-utf-append_assure.c test-utf-append build_sources_program test-utf-character_is_alphabetic.c test-utf-is_alphabetic.c build_sources_program test-utf-character_is_combining.c test-utf-is_combining.c build_sources_program test-utf-character_is_control.c test-utf-is_control.c +build_sources_program test-utf-character_is_decimal.c test-utf-is_decimal.c build_sources_program test-utf-character_is_digit.c test-utf-is_digit.c build_sources_program test-utf-character_is_emoji.c test-utf-is_emoji.c build_sources_program test-utf-character_is_numeric.c test-utf-is_numeric.c diff --git a/level_0/f_utf/data/tests/bytesequences/decimal-all.txt b/level_0/f_utf/data/tests/bytesequences/decimal-all.txt new file mode 100644 index 0000000..5c312bf --- /dev/null +++ b/level_0/f_utf/data/tests/bytesequences/decimal-all.txt @@ -0,0 +1,701 @@ +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +55712 +55713 +55714 +55715 +55716 +55717 +55718 +55719 +55720 +55721 +56240 +56241 +56242 +56243 +56244 +56245 +56246 +56247 +56248 +56249 +57216 +57217 +57218 +57219 +57220 +57221 +57222 +57223 +57224 +57225 +14722470 +14722471 +14722472 +14722473 +14722474 +14722475 +14722476 +14722477 +14722478 +14722479 +14722982 +14722983 +14722984 +14722985 +14722986 +14722987 +14722988 +14722989 +14722990 +14722991 +14723494 +14723495 +14723496 +14723497 +14723498 +14723499 +14723500 +14723501 +14723502 +14723503 +14724006 +14724007 +14724008 +14724009 +14724010 +14724011 +14724012 +14724013 +14724014 +14724015 +14724518 +14724519 +14724520 +14724521 +14724522 +14724523 +14724524 +14724525 +14724526 +14724527 +14725030 +14725031 +14725032 +14725033 +14725034 +14725035 +14725036 +14725037 +14725038 +14725039 +14725542 +14725543 +14725544 +14725545 +14725546 +14725547 +14725548 +14725549 +14725550 +14725551 +14726054 +14726055 +14726056 +14726057 +14726058 +14726059 +14726060 +14726061 +14726062 +14726063 +14726566 +14726567 +14726568 +14726569 +14726570 +14726571 +14726572 +14726573 +14726574 +14726575 +14727078 +14727079 +14727080 +14727081 +14727082 +14727083 +14727084 +14727085 +14727086 +14727087 +14727568 +14727569 +14727570 +14727571 +14727572 +14727573 +14727574 +14727575 +14727576 +14727577 +14728080 +14728081 +14728082 +14728083 +14728084 +14728085 +14728086 +14728087 +14728088 +14728089 +14728352 +14728353 +14728354 +14728355 +14728356 +14728357 +14728358 +14728359 +14728360 +14728361 +14778752 +14778753 +14778754 +14778755 +14778756 +14778757 +14778758 +14778759 +14778760 +14778761 +14779024 +14779025 +14779026 +14779027 +14779028 +14779029 +14779030 +14779031 +14779032 +14779033 +14786464 +14786465 +14786466 +14786467 +14786468 +14786469 +14786470 +14786471 +14786472 +14786473 +14786704 +14786705 +14786706 +14786707 +14786708 +14786709 +14786710 +14786711 +14786712 +14786713 +14787974 +14787975 +14787976 +14787977 +14787978 +14787979 +14787980 +14787981 +14787982 +14787983 +14788496 +14788497 +14788498 +14788499 +14788500 +14788501 +14788502 +14788503 +14788504 +14788505 +14789248 +14789249 +14789250 +14789251 +14789252 +14789253 +14789254 +14789255 +14789256 +14789257 +14789264 +14789265 +14789266 +14789267 +14789268 +14789269 +14789270 +14789271 +14789272 +14789273 +14790032 +14790033 +14790034 +14790035 +14790036 +14790037 +14790038 +14790039 +14790040 +14790041 +14790320 +14790321 +14790322 +14790323 +14790324 +14790325 +14790326 +14790327 +14790328 +14790329 +14791040 +14791041 +14791042 +14791043 +14791044 +14791045 +14791046 +14791047 +14791048 +14791049 +14791056 +14791057 +14791058 +14791059 +14791060 +14791061 +14791062 +14791063 +14791064 +14791065 +14845344 +14845345 +14845346 +14845347 +14845348 +14845349 +14845350 +14845351 +14845352 +14845353 +14845354 +14845355 +14845356 +14845357 +14845358 +14845359 +14845360 +14845361 +14845362 +14845363 +14845364 +14845365 +14845366 +14845367 +14845368 +14845369 +14845370 +14845371 +14845372 +14845373 +14845374 +14845375 +14845568 +14845569 +14845570 +14845571 +14845572 +14845573 +14845574 +14845575 +14845576 +15374496 +15374497 +15374498 +15374499 +15374500 +15374501 +15374502 +15374503 +15374504 +15374505 +15377296 +15377297 +15377298 +15377299 +15377300 +15377301 +15377302 +15377303 +15377304 +15377305 +15377536 +15377537 +15377538 +15377539 +15377540 +15377541 +15377542 +15377543 +15377544 +15377545 +15378320 +15378321 +15378322 +15378323 +15378324 +15378325 +15378326 +15378327 +15378328 +15378329 +15378352 +15378353 +15378354 +15378355 +15378356 +15378357 +15378358 +15378359 +15378360 +15378361 +15378832 +15378833 +15378834 +15378835 +15378836 +15378837 +15378838 +15378839 +15378840 +15378841 +15380400 +15380401 +15380402 +15380403 +15380404 +15380405 +15380406 +15380407 +15380408 +15380409 +15711376 +15711377 +15711378 +15711379 +15711380 +15711381 +15711382 +15711383 +15711384 +15711385 +4036006560 +4036006561 +4036006562 +4036006563 +4036006564 +4036006565 +4036006566 +4036006567 +4036006568 +4036006569 +4036015280 +4036015281 +4036015282 +4036015283 +4036015284 +4036015285 +4036015286 +4036015287 +4036015288 +4036015289 +4036067750 +4036067751 +4036067752 +4036067753 +4036067754 +4036067755 +4036067756 +4036067757 +4036067758 +4036067759 +4036068272 +4036068273 +4036068274 +4036068275 +4036068276 +4036068277 +4036068278 +4036068279 +4036068280 +4036068281 +4036068534 +4036068535 +4036068536 +4036068537 +4036068538 +4036068539 +4036068540 +4036068541 +4036068542 +4036068543 +4036069264 +4036069265 +4036069266 +4036069267 +4036069268 +4036069269 +4036069270 +4036069271 +4036069272 +4036069273 +4036070320 +4036070321 +4036070322 +4036070323 +4036070324 +4036070325 +4036070326 +4036070327 +4036070328 +4036070329 +4036071824 +4036071825 +4036071826 +4036071827 +4036071828 +4036071829 +4036071830 +4036071831 +4036071832 +4036071833 +4036072336 +4036072337 +4036072338 +4036072339 +4036072340 +4036072341 +4036072342 +4036072343 +4036072344 +4036072345 +4036073872 +4036073873 +4036073874 +4036073875 +4036073876 +4036073877 +4036073878 +4036073879 +4036073880 +4036073881 +4036074368 +4036074369 +4036074370 +4036074371 +4036074372 +4036074373 +4036074374 +4036074375 +4036074376 +4036074377 +4036074672 +4036074673 +4036074674 +4036074675 +4036074676 +4036074677 +4036074678 +4036074679 +4036074680 +4036074681 +4036076448 +4036076449 +4036076450 +4036076451 +4036076452 +4036076453 +4036076454 +4036076455 +4036076456 +4036076457 +4036076944 +4036076945 +4036076946 +4036076947 +4036076948 +4036076949 +4036076950 +4036076951 +4036076952 +4036076953 +4036080016 +4036080017 +4036080018 +4036080019 +4036080020 +4036080021 +4036080022 +4036080023 +4036080024 +4036080025 +4036081040 +4036081041 +4036081042 +4036081043 +4036081044 +4036081045 +4036081046 +4036081047 +4036081048 +4036081049 +4036081312 +4036081313 +4036081314 +4036081315 +4036081316 +4036081317 +4036081318 +4036081319 +4036081320 +4036081321 +4036405664 +4036405665 +4036405666 +4036405667 +4036405668 +4036405669 +4036405670 +4036405671 +4036405672 +4036405673 +4036406144 +4036406145 +4036406146 +4036406147 +4036406148 +4036406149 +4036406150 +4036406151 +4036406152 +4036406153 +4036406672 +4036406673 +4036406674 +4036406675 +4036406676 +4036406677 +4036406678 +4036406679 +4036406680 +4036406681 +4036861838 +4036861839 +4036861840 +4036861841 +4036861842 +4036861843 +4036861844 +4036861845 +4036861846 +4036861847 +4036861848 +4036861849 +4036861850 +4036861851 +4036861852 +4036861853 +4036861854 +4036861855 +4036861856 +4036861857 +4036861858 +4036861859 +4036861860 +4036861861 +4036861862 +4036861863 +4036861864 +4036861865 +4036861866 +4036861867 +4036861868 +4036861869 +4036861870 +4036861871 +4036861872 +4036861873 +4036861874 +4036861875 +4036861876 +4036861877 +4036861878 +4036861879 +4036861880 +4036861881 +4036861882 +4036861883 +4036861884 +4036861885 +4036861886 +4036861887 +4036920704 +4036920705 +4036920706 +4036920707 +4036920708 +4036920709 +4036920710 +4036920711 +4036920712 +4036920713 +4036922288 +4036922289 +4036922290 +4036922291 +4036922292 +4036922293 +4036922294 +4036922295 +4036922296 +4036922297 +4036928912 +4036928913 +4036928914 +4036928915 +4036928916 +4036928917 +4036928918 +4036928919 +4036928920 +4036928921 +4036997040 +4036997041 +4036997042 +4036997043 +4036997044 +4036997045 +4036997046 +4036997047 +4036997048 +4036997049 diff --git a/level_0/f_utf/data/tests/bytesequences/digit-all.txt b/level_0/f_utf/data/tests/bytesequences/digit-all.txt index 9bcbc2f..5c312bf 100644 --- a/level_0/f_utf/data/tests/bytesequences/digit-all.txt +++ b/level_0/f_utf/data/tests/bytesequences/digit-all.txt @@ -178,16 +178,6 @@ 14778759 14778760 14778761 -4036006560 -4036006561 -4036006562 -4036006563 -4036006564 -4036006565 -4036006566 -4036006567 -4036006568 -4036006569 14779024 14779025 14779026 @@ -198,6 +188,237 @@ 14779031 14779032 14779033 +14786464 +14786465 +14786466 +14786467 +14786468 +14786469 +14786470 +14786471 +14786472 +14786473 +14786704 +14786705 +14786706 +14786707 +14786708 +14786709 +14786710 +14786711 +14786712 +14786713 +14787974 +14787975 +14787976 +14787977 +14787978 +14787979 +14787980 +14787981 +14787982 +14787983 +14788496 +14788497 +14788498 +14788499 +14788500 +14788501 +14788502 +14788503 +14788504 +14788505 +14789248 +14789249 +14789250 +14789251 +14789252 +14789253 +14789254 +14789255 +14789256 +14789257 +14789264 +14789265 +14789266 +14789267 +14789268 +14789269 +14789270 +14789271 +14789272 +14789273 +14790032 +14790033 +14790034 +14790035 +14790036 +14790037 +14790038 +14790039 +14790040 +14790041 +14790320 +14790321 +14790322 +14790323 +14790324 +14790325 +14790326 +14790327 +14790328 +14790329 +14791040 +14791041 +14791042 +14791043 +14791044 +14791045 +14791046 +14791047 +14791048 +14791049 +14791056 +14791057 +14791058 +14791059 +14791060 +14791061 +14791062 +14791063 +14791064 +14791065 +14845344 +14845345 +14845346 +14845347 +14845348 +14845349 +14845350 +14845351 +14845352 +14845353 +14845354 +14845355 +14845356 +14845357 +14845358 +14845359 +14845360 +14845361 +14845362 +14845363 +14845364 +14845365 +14845366 +14845367 +14845368 +14845369 +14845370 +14845371 +14845372 +14845373 +14845374 +14845375 +14845568 +14845569 +14845570 +14845571 +14845572 +14845573 +14845574 +14845575 +14845576 +15374496 +15374497 +15374498 +15374499 +15374500 +15374501 +15374502 +15374503 +15374504 +15374505 +15377296 +15377297 +15377298 +15377299 +15377300 +15377301 +15377302 +15377303 +15377304 +15377305 +15377536 +15377537 +15377538 +15377539 +15377540 +15377541 +15377542 +15377543 +15377544 +15377545 +15378320 +15378321 +15378322 +15378323 +15378324 +15378325 +15378326 +15378327 +15378328 +15378329 +15378352 +15378353 +15378354 +15378355 +15378356 +15378357 +15378358 +15378359 +15378360 +15378361 +15378832 +15378833 +15378834 +15378835 +15378836 +15378837 +15378838 +15378839 +15378840 +15378841 +15380400 +15380401 +15380402 +15380403 +15380404 +15380405 +15380406 +15380407 +15380408 +15380409 +15711376 +15711377 +15711378 +15711379 +15711380 +15711381 +15711382 +15711383 +15711384 +15711385 +4036006560 +4036006561 +4036006562 +4036006563 +4036006564 +4036006565 +4036006566 +4036006567 +4036006568 +4036006569 4036015280 4036015281 4036015282 @@ -368,6 +589,16 @@ 4036405671 4036405672 4036405673 +4036406144 +4036406145 +4036406146 +4036406147 +4036406148 +4036406149 +4036406150 +4036406151 +4036406152 +4036406153 4036406672 4036406673 4036406674 @@ -378,106 +609,6 @@ 4036406679 4036406680 4036406681 -14786464 -14786465 -14786466 -14786467 -14786468 -14786469 -14786470 -14786471 -14786472 -14786473 -14786704 -14786705 -14786706 -14786707 -14786708 -14786709 -14786710 -14786711 -14786712 -14786713 -14787974 -14787975 -14787976 -14787977 -14787978 -14787979 -14787980 -14787981 -14787982 -14787983 -14788496 -14788497 -14788498 -14788499 -14788500 -14788501 -14788502 -14788503 -14788504 -14788505 -14789248 -14789249 -14789250 -14789251 -14789252 -14789253 -14789254 -14789255 -14789256 -14789257 -14789264 -14789265 -14789266 -14789267 -14789268 -14789269 -14789270 -14789271 -14789272 -14789273 -14790032 -14790033 -14790034 -14790035 -14790036 -14790037 -14790038 -14790039 -14790040 -14790041 -14790320 -14790321 -14790322 -14790323 -14790324 -14790325 -14790326 -14790327 -14790328 -14790329 -14791040 -14791041 -14791042 -14791043 -14791044 -14791045 -14791046 -14791047 -14791048 -14791049 -14791056 -14791057 -14791058 -14791059 -14791060 -14791061 -14791062 -14791063 -14791064 -14791065 4036861838 4036861839 4036861840 @@ -568,83 +699,3 @@ 4036997047 4036997048 4036997049 -15374496 -15374497 -15374498 -15374499 -15374500 -15374501 -15374502 -15374503 -15374504 -15374505 -15377296 -15377297 -15377298 -15377299 -15377300 -15377301 -15377302 -15377303 -15377304 -15377305 -15377536 -15377537 -15377538 -15377539 -15377540 -15377541 -15377542 -15377543 -15377544 -15377545 -15378320 -15378321 -15378322 -15378323 -15378324 -15378325 -15378326 -15378327 -15378328 -15378329 -15378352 -15378353 -15378354 -15378355 -15378356 -15378357 -15378358 -15378359 -15378360 -15378361 -15378832 -15378833 -15378834 -15378835 -15378836 -15378837 -15378838 -15378839 -15378840 -15378841 -15380400 -15380401 -15380402 -15380403 -15380404 -15380405 -15380406 -15380407 -15380408 -15380409 -15711376 -15711377 -15711378 -15711379 -15711380 -15711381 -15711382 -15711383 -15711384 -15711385 diff --git a/level_0/f_utf/data/tests/codepoints/decimal-all.txt b/level_0/f_utf/data/tests/codepoints/decimal-all.txt new file mode 100644 index 0000000..5923866 --- /dev/null +++ b/level_0/f_utf/data/tests/codepoints/decimal-all.txt @@ -0,0 +1,701 @@ +U+0030 +U+0031 +U+0032 +U+0033 +U+0034 +U+0035 +U+0036 +U+0037 +U+0038 +U+0039 +U+0660 +U+0661 +U+0662 +U+0663 +U+0664 +U+0665 +U+0666 +U+0667 +U+0668 +U+0669 +U+06F0 +U+06F1 +U+06F2 +U+06F3 +U+06F4 +U+06F5 +U+06F6 +U+06F7 +U+06F8 +U+06F9 +U+07C0 +U+07C1 +U+07C2 +U+07C3 +U+07C4 +U+07C5 +U+07C6 +U+07C7 +U+07C8 +U+07C9 +U+0966 +U+0967 +U+0968 +U+0969 +U+096A +U+096B +U+096C +U+096D +U+096E +U+096F +U+09E6 +U+09E7 +U+09E8 +U+09E9 +U+09EA +U+09EB +U+09EC +U+09ED +U+09EE +U+09EF +U+0A66 +U+0A67 +U+0A68 +U+0A69 +U+0A6A +U+0A6B +U+0A6C +U+0A6D +U+0A6E +U+0A6F +U+0AE6 +U+0AE7 +U+0AE8 +U+0AE9 +U+0AEA +U+0AEB +U+0AEC +U+0AED +U+0AEE +U+0AEF +U+0B66 +U+0B67 +U+0B68 +U+0B69 +U+0B6A +U+0B6B +U+0B6C +U+0B6D +U+0B6E +U+0B6F +U+0BE6 +U+0BE7 +U+0BE8 +U+0BE9 +U+0BEA +U+0BEB +U+0BEC +U+0BED +U+0BEE +U+0BEF +U+0C66 +U+0C67 +U+0C68 +U+0C69 +U+0C6A +U+0C6B +U+0C6C +U+0C6D +U+0C6E +U+0C6F +U+0CE6 +U+0CE7 +U+0CE8 +U+0CE9 +U+0CEA +U+0CEB +U+0CEC +U+0CED +U+0CEE +U+0CEF +U+0D66 +U+0D67 +U+0D68 +U+0D69 +U+0D6A +U+0D6B +U+0D6C +U+0D6D +U+0D6E +U+0D6F +U+0DE6 +U+0DE7 +U+0DE8 +U+0DE9 +U+0DEA +U+0DEB +U+0DEC +U+0DED +U+0DEE +U+0DEF +U+0E50 +U+0E51 +U+0E52 +U+0E53 +U+0E54 +U+0E55 +U+0E56 +U+0E57 +U+0E58 +U+0E59 +U+0ED0 +U+0ED1 +U+0ED2 +U+0ED3 +U+0ED4 +U+0ED5 +U+0ED6 +U+0ED7 +U+0ED8 +U+0ED9 +U+0F20 +U+0F21 +U+0F22 +U+0F23 +U+0F24 +U+0F25 +U+0F26 +U+0F27 +U+0F28 +U+0F29 +U+1040 +U+1041 +U+1042 +U+1043 +U+1044 +U+1045 +U+1046 +U+1047 +U+1048 +U+1049 +U+1090 +U+1091 +U+1092 +U+1093 +U+1094 +U+1095 +U+1096 +U+1097 +U+1098 +U+1099 +U+17E0 +U+17E1 +U+17E2 +U+17E3 +U+17E4 +U+17E5 +U+17E6 +U+17E7 +U+17E8 +U+17E9 +U+1810 +U+1811 +U+1812 +U+1813 +U+1814 +U+1815 +U+1816 +U+1817 +U+1818 +U+1819 +U+1946 +U+1947 +U+1948 +U+1949 +U+194A +U+194B +U+194C +U+194D +U+194E +U+194F +U+19D0 +U+19D1 +U+19D2 +U+19D3 +U+19D4 +U+19D5 +U+19D6 +U+19D7 +U+19D8 +U+19D9 +U+1A80 +U+1A81 +U+1A82 +U+1A83 +U+1A84 +U+1A85 +U+1A86 +U+1A87 +U+1A88 +U+1A89 +U+1A90 +U+1A91 +U+1A92 +U+1A93 +U+1A94 +U+1A95 +U+1A96 +U+1A97 +U+1A98 +U+1A99 +U+1B50 +U+1B51 +U+1B52 +U+1B53 +U+1B54 +U+1B55 +U+1B56 +U+1B57 +U+1B58 +U+1B59 +U+1BB0 +U+1BB1 +U+1BB2 +U+1BB3 +U+1BB4 +U+1BB5 +U+1BB6 +U+1BB7 +U+1BB8 +U+1BB9 +U+1C40 +U+1C41 +U+1C42 +U+1C43 +U+1C44 +U+1C45 +U+1C46 +U+1C47 +U+1C48 +U+1C49 +U+1C50 +U+1C51 +U+1C52 +U+1C53 +U+1C54 +U+1C55 +U+1C56 +U+1C57 +U+1C58 +U+1C59 +U+2160 +U+2161 +U+2162 +U+2163 +U+2164 +U+2165 +U+2166 +U+2167 +U+2168 +U+2169 +U+216A +U+216B +U+216C +U+216D +U+216E +U+216F +U+2170 +U+2171 +U+2172 +U+2173 +U+2174 +U+2175 +U+2176 +U+2177 +U+2178 +U+2179 +U+217A +U+217B +U+217C +U+217D +U+217E +U+217F +U+2180 +U+2181 +U+2182 +U+2183 +U+2184 +U+2185 +U+2186 +U+2187 +U+2188 +U+A620 +U+A621 +U+A622 +U+A623 +U+A624 +U+A625 +U+A626 +U+A627 +U+A628 +U+A629 +U+A8D0 +U+A8D1 +U+A8D2 +U+A8D3 +U+A8D4 +U+A8D5 +U+A8D6 +U+A8D7 +U+A8D8 +U+A8D9 +U+A900 +U+A901 +U+A902 +U+A903 +U+A904 +U+A905 +U+A906 +U+A907 +U+A908 +U+A909 +U+A9D0 +U+A9D1 +U+A9D2 +U+A9D3 +U+A9D4 +U+A9D5 +U+A9D6 +U+A9D7 +U+A9D8 +U+A9D9 +U+A9F0 +U+A9F1 +U+A9F2 +U+A9F3 +U+A9F4 +U+A9F5 +U+A9F6 +U+A9F7 +U+A9F8 +U+A9F9 +U+AA50 +U+AA51 +U+AA52 +U+AA53 +U+AA54 +U+AA55 +U+AA56 +U+AA57 +U+AA58 +U+AA59 +U+ABF0 +U+ABF1 +U+ABF2 +U+ABF3 +U+ABF4 +U+ABF5 +U+ABF6 +U+ABF7 +U+ABF8 +U+ABF9 +U+FF10 +U+FF11 +U+FF12 +U+FF13 +U+FF14 +U+FF15 +U+FF16 +U+FF17 +U+FF18 +U+FF19 +U+104A0 +U+104A1 +U+104A2 +U+104A3 +U+104A4 +U+104A5 +U+104A6 +U+104A7 +U+104A8 +U+104A9 +U+10D30 +U+10D31 +U+10D32 +U+10D33 +U+10D34 +U+10D35 +U+10D36 +U+10D37 +U+10D38 +U+10D39 +U+11066 +U+11067 +U+11068 +U+11069 +U+1106A +U+1106B +U+1106C +U+1106D +U+1106E +U+1106F +U+110F0 +U+110F1 +U+110F2 +U+110F3 +U+110F4 +U+110F5 +U+110F6 +U+110F7 +U+110F8 +U+110F9 +U+11136 +U+11137 +U+11138 +U+11139 +U+1113A +U+1113B +U+1113C +U+1113D +U+1113E +U+1113F +U+111D0 +U+111D1 +U+111D2 +U+111D3 +U+111D4 +U+111D5 +U+111D6 +U+111D7 +U+111D8 +U+111D9 +U+112F0 +U+112F1 +U+112F2 +U+112F3 +U+112F4 +U+112F5 +U+112F6 +U+112F7 +U+112F8 +U+112F9 +U+11450 +U+11451 +U+11452 +U+11453 +U+11454 +U+11455 +U+11456 +U+11457 +U+11458 +U+11459 +U+114D0 +U+114D1 +U+114D2 +U+114D3 +U+114D4 +U+114D5 +U+114D6 +U+114D7 +U+114D8 +U+114D9 +U+11650 +U+11651 +U+11652 +U+11653 +U+11654 +U+11655 +U+11656 +U+11657 +U+11658 +U+11659 +U+116C0 +U+116C1 +U+116C2 +U+116C3 +U+116C4 +U+116C5 +U+116C6 +U+116C7 +U+116C8 +U+116C9 +U+11730 +U+11731 +U+11732 +U+11733 +U+11734 +U+11735 +U+11736 +U+11737 +U+11738 +U+11739 +U+118E0 +U+118E1 +U+118E2 +U+118E3 +U+118E4 +U+118E5 +U+118E6 +U+118E7 +U+118E8 +U+118E9 +U+11950 +U+11951 +U+11952 +U+11953 +U+11954 +U+11955 +U+11956 +U+11957 +U+11958 +U+11959 +U+11C50 +U+11C51 +U+11C52 +U+11C53 +U+11C54 +U+11C55 +U+11C56 +U+11C57 +U+11C58 +U+11C59 +U+11D50 +U+11D51 +U+11D52 +U+11D53 +U+11D54 +U+11D55 +U+11D56 +U+11D57 +U+11D58 +U+11D59 +U+11DA0 +U+11DA1 +U+11DA2 +U+11DA3 +U+11DA4 +U+11DA5 +U+11DA6 +U+11DA7 +U+11DA8 +U+11DA9 +U+16A60 +U+16A61 +U+16A62 +U+16A63 +U+16A64 +U+16A65 +U+16A66 +U+16A67 +U+16A68 +U+16A69 +U+16AC0 +U+16AC1 +U+16AC2 +U+16AC3 +U+16AC4 +U+16AC5 +U+16AC6 +U+16AC7 +U+16AC8 +U+16AC9 +U+16B50 +U+16B51 +U+16B52 +U+16B53 +U+16B54 +U+16B55 +U+16B56 +U+16B57 +U+16B58 +U+16B59 +U+1D7CE +U+1D7CF +U+1D7D0 +U+1D7D1 +U+1D7D2 +U+1D7D3 +U+1D7D4 +U+1D7D5 +U+1D7D6 +U+1D7D7 +U+1D7D8 +U+1D7D9 +U+1D7DA +U+1D7DB +U+1D7DC +U+1D7DD +U+1D7DE +U+1D7DF +U+1D7E0 +U+1D7E1 +U+1D7E2 +U+1D7E3 +U+1D7E4 +U+1D7E5 +U+1D7E6 +U+1D7E7 +U+1D7E8 +U+1D7E9 +U+1D7EA +U+1D7EB +U+1D7EC +U+1D7ED +U+1D7EE +U+1D7EF +U+1D7F0 +U+1D7F1 +U+1D7F2 +U+1D7F3 +U+1D7F4 +U+1D7F5 +U+1D7F6 +U+1D7F7 +U+1D7F8 +U+1D7F9 +U+1D7FA +U+1D7FB +U+1D7FC +U+1D7FD +U+1D7FE +U+1D7FF +U+1E140 +U+1E141 +U+1E142 +U+1E143 +U+1E144 +U+1E145 +U+1E146 +U+1E147 +U+1E148 +U+1E149 +U+1E2F0 +U+1E2F1 +U+1E2F2 +U+1E2F3 +U+1E2F4 +U+1E2F5 +U+1E2F6 +U+1E2F7 +U+1E2F8 +U+1E2F9 +U+1E950 +U+1E951 +U+1E952 +U+1E953 +U+1E954 +U+1E955 +U+1E956 +U+1E957 +U+1E958 +U+1E959 +U+1FBF0 +U+1FBF1 +U+1FBF2 +U+1FBF3 +U+1FBF4 +U+1FBF5 +U+1FBF6 +U+1FBF7 +U+1FBF8 +U+1FBF9 diff --git a/level_0/f_utf/data/tests/codepoints/digit-all.txt b/level_0/f_utf/data/tests/codepoints/digit-all.txt index 4f954bd..5923866 100644 --- a/level_0/f_utf/data/tests/codepoints/digit-all.txt +++ b/level_0/f_utf/data/tests/codepoints/digit-all.txt @@ -1,183 +1,414 @@ -U+0030 -U+0031 -U+0032 -U+0033 -U+0034 -U+0035 -U+0036 -U+0037 -U+0038 -U+0039 -U+0660 -U+0661 -U+0662 -U+0663 -U+0664 -U+0665 -U+0666 -U+0667 -U+0668 -U+0669 -U+06F0 -U+06F1 -U+06F2 -U+06F3 -U+06F4 -U+06F5 -U+06F6 -U+06F7 -U+06F8 -U+06F9 -U+07C0 -U+07C1 -U+07C2 -U+07C3 -U+07C4 -U+07C5 -U+07C6 -U+07C7 -U+07C8 -U+07C9 -U+0966 -U+0967 -U+0968 -U+0969 -U+096A -U+096B -U+096C -U+096D -U+096E -U+096F -U+09E6 -U+09E7 -U+09E8 -U+09E9 -U+09EA -U+09EB -U+09EC -U+09ED -U+09EE -U+09EF -U+0A66 -U+0A67 -U+0A68 -U+0A69 -U+0A6A -U+0A6B -U+0A6C -U+0A6D -U+0A6E -U+0A6F -U+0AE6 -U+0AE7 -U+0AE8 -U+0AE9 -U+0AEA -U+0AEB -U+0AEC -U+0AED -U+0AEE -U+0AEF -U+0B66 -U+0B67 -U+0B68 -U+0B69 -U+0B6A -U+0B6B -U+0B6C -U+0B6D -U+0B6E -U+0B6F -U+0BE6 -U+0BE7 -U+0BE8 -U+0BE9 -U+0BEA -U+0BEB -U+0BEC -U+0BED -U+0BEE -U+0BEF -U+0C66 -U+0C67 -U+0C68 -U+0C69 -U+0C6A -U+0C6B -U+0C6C -U+0C6D -U+0C6E -U+0C6F -U+0CE6 -U+0CE7 -U+0CE8 -U+0CE9 -U+0CEA -U+0CEB -U+0CEC -U+0CED -U+0CEE -U+0CEF -U+0D66 -U+0D67 -U+0D68 -U+0D69 -U+0D6A -U+0D6B -U+0D6C -U+0D6D -U+0D6E -U+0D6F -U+0DE6 -U+0DE7 -U+0DE8 -U+0DE9 -U+0DEA -U+0DEB -U+0DEC -U+0DED -U+0DEE -U+0DEF -U+0E50 -U+0E51 -U+0E52 -U+0E53 -U+0E54 -U+0E55 -U+0E56 -U+0E57 -U+0E58 -U+0E59 -U+0ED0 -U+0ED1 -U+0ED2 -U+0ED3 -U+0ED4 -U+0ED5 -U+0ED6 -U+0ED7 -U+0ED8 -U+0ED9 -U+0F20 -U+0F21 -U+0F22 -U+0F23 -U+0F24 -U+0F25 -U+0F26 -U+0F27 -U+0F28 -U+0F29 -U+1040 -U+1041 -U+1042 -U+1043 -U+1044 -U+1045 -U+1046 -U+1047 -U+1048 -U+1049 +U+0030 +U+0031 +U+0032 +U+0033 +U+0034 +U+0035 +U+0036 +U+0037 +U+0038 +U+0039 +U+0660 +U+0661 +U+0662 +U+0663 +U+0664 +U+0665 +U+0666 +U+0667 +U+0668 +U+0669 +U+06F0 +U+06F1 +U+06F2 +U+06F3 +U+06F4 +U+06F5 +U+06F6 +U+06F7 +U+06F8 +U+06F9 +U+07C0 +U+07C1 +U+07C2 +U+07C3 +U+07C4 +U+07C5 +U+07C6 +U+07C7 +U+07C8 +U+07C9 +U+0966 +U+0967 +U+0968 +U+0969 +U+096A +U+096B +U+096C +U+096D +U+096E +U+096F +U+09E6 +U+09E7 +U+09E8 +U+09E9 +U+09EA +U+09EB +U+09EC +U+09ED +U+09EE +U+09EF +U+0A66 +U+0A67 +U+0A68 +U+0A69 +U+0A6A +U+0A6B +U+0A6C +U+0A6D +U+0A6E +U+0A6F +U+0AE6 +U+0AE7 +U+0AE8 +U+0AE9 +U+0AEA +U+0AEB +U+0AEC +U+0AED +U+0AEE +U+0AEF +U+0B66 +U+0B67 +U+0B68 +U+0B69 +U+0B6A +U+0B6B +U+0B6C +U+0B6D +U+0B6E +U+0B6F +U+0BE6 +U+0BE7 +U+0BE8 +U+0BE9 +U+0BEA +U+0BEB +U+0BEC +U+0BED +U+0BEE +U+0BEF +U+0C66 +U+0C67 +U+0C68 +U+0C69 +U+0C6A +U+0C6B +U+0C6C +U+0C6D +U+0C6E +U+0C6F +U+0CE6 +U+0CE7 +U+0CE8 +U+0CE9 +U+0CEA +U+0CEB +U+0CEC +U+0CED +U+0CEE +U+0CEF +U+0D66 +U+0D67 +U+0D68 +U+0D69 +U+0D6A +U+0D6B +U+0D6C +U+0D6D +U+0D6E +U+0D6F +U+0DE6 +U+0DE7 +U+0DE8 +U+0DE9 +U+0DEA +U+0DEB +U+0DEC +U+0DED +U+0DEE +U+0DEF +U+0E50 +U+0E51 +U+0E52 +U+0E53 +U+0E54 +U+0E55 +U+0E56 +U+0E57 +U+0E58 +U+0E59 +U+0ED0 +U+0ED1 +U+0ED2 +U+0ED3 +U+0ED4 +U+0ED5 +U+0ED6 +U+0ED7 +U+0ED8 +U+0ED9 +U+0F20 +U+0F21 +U+0F22 +U+0F23 +U+0F24 +U+0F25 +U+0F26 +U+0F27 +U+0F28 +U+0F29 +U+1040 +U+1041 +U+1042 +U+1043 +U+1044 +U+1045 +U+1046 +U+1047 +U+1048 +U+1049 +U+1090 +U+1091 +U+1092 +U+1093 +U+1094 +U+1095 +U+1096 +U+1097 +U+1098 +U+1099 +U+17E0 +U+17E1 +U+17E2 +U+17E3 +U+17E4 +U+17E5 +U+17E6 +U+17E7 +U+17E8 +U+17E9 +U+1810 +U+1811 +U+1812 +U+1813 +U+1814 +U+1815 +U+1816 +U+1817 +U+1818 +U+1819 +U+1946 +U+1947 +U+1948 +U+1949 +U+194A +U+194B +U+194C +U+194D +U+194E +U+194F +U+19D0 +U+19D1 +U+19D2 +U+19D3 +U+19D4 +U+19D5 +U+19D6 +U+19D7 +U+19D8 +U+19D9 +U+1A80 +U+1A81 +U+1A82 +U+1A83 +U+1A84 +U+1A85 +U+1A86 +U+1A87 +U+1A88 +U+1A89 +U+1A90 +U+1A91 +U+1A92 +U+1A93 +U+1A94 +U+1A95 +U+1A96 +U+1A97 +U+1A98 +U+1A99 +U+1B50 +U+1B51 +U+1B52 +U+1B53 +U+1B54 +U+1B55 +U+1B56 +U+1B57 +U+1B58 +U+1B59 +U+1BB0 +U+1BB1 +U+1BB2 +U+1BB3 +U+1BB4 +U+1BB5 +U+1BB6 +U+1BB7 +U+1BB8 +U+1BB9 +U+1C40 +U+1C41 +U+1C42 +U+1C43 +U+1C44 +U+1C45 +U+1C46 +U+1C47 +U+1C48 +U+1C49 +U+1C50 +U+1C51 +U+1C52 +U+1C53 +U+1C54 +U+1C55 +U+1C56 +U+1C57 +U+1C58 +U+1C59 +U+2160 +U+2161 +U+2162 +U+2163 +U+2164 +U+2165 +U+2166 +U+2167 +U+2168 +U+2169 +U+216A +U+216B +U+216C +U+216D +U+216E +U+216F +U+2170 +U+2171 +U+2172 +U+2173 +U+2174 +U+2175 +U+2176 +U+2177 +U+2178 +U+2179 +U+217A +U+217B +U+217C +U+217D +U+217E +U+217F +U+2180 +U+2181 +U+2182 +U+2183 +U+2184 +U+2185 +U+2186 +U+2187 +U+2188 +U+A620 +U+A621 +U+A622 +U+A623 +U+A624 +U+A625 +U+A626 +U+A627 +U+A628 +U+A629 +U+A8D0 +U+A8D1 +U+A8D2 +U+A8D3 +U+A8D4 +U+A8D5 +U+A8D6 +U+A8D7 +U+A8D8 +U+A8D9 +U+A900 +U+A901 +U+A902 +U+A903 +U+A904 +U+A905 +U+A906 +U+A907 +U+A908 +U+A909 +U+A9D0 +U+A9D1 +U+A9D2 +U+A9D3 +U+A9D4 +U+A9D5 +U+A9D6 +U+A9D7 +U+A9D8 +U+A9D9 +U+A9F0 +U+A9F1 +U+A9F2 +U+A9F3 +U+A9F4 +U+A9F5 +U+A9F6 +U+A9F7 +U+A9F8 +U+A9F9 +U+AA50 +U+AA51 +U+AA52 +U+AA53 +U+AA54 +U+AA55 +U+AA56 +U+AA57 +U+AA58 +U+AA59 +U+ABF0 +U+ABF1 +U+ABF2 +U+ABF3 +U+ABF4 +U+ABF5 +U+ABF6 +U+ABF7 +U+ABF8 +U+ABF9 +U+FF10 +U+FF11 +U+FF12 +U+FF13 +U+FF14 +U+FF15 +U+FF16 +U+FF17 +U+FF18 +U+FF19 U+104A0 U+104A1 U+104A2 @@ -188,16 +419,6 @@ U+104A6 U+104A7 U+104A8 U+104A9 -U+1090 -U+1091 -U+1092 -U+1093 -U+1094 -U+1095 -U+1096 -U+1097 -U+1098 -U+1099 U+10D30 U+10D31 U+10D32 @@ -368,6 +589,16 @@ U+16A66 U+16A67 U+16A68 U+16A69 +U+16AC0 +U+16AC1 +U+16AC2 +U+16AC3 +U+16AC4 +U+16AC5 +U+16AC6 +U+16AC7 +U+16AC8 +U+16AC9 U+16B50 U+16B51 U+16B52 @@ -378,106 +609,6 @@ U+16B56 U+16B57 U+16B58 U+16B59 -U+17E0 -U+17E1 -U+17E2 -U+17E3 -U+17E4 -U+17E5 -U+17E6 -U+17E7 -U+17E8 -U+17E9 -U+1810 -U+1811 -U+1812 -U+1813 -U+1814 -U+1815 -U+1816 -U+1817 -U+1818 -U+1819 -U+1946 -U+1947 -U+1948 -U+1949 -U+194A -U+194B -U+194C -U+194D -U+194E -U+194F -U+19D0 -U+19D1 -U+19D2 -U+19D3 -U+19D4 -U+19D5 -U+19D6 -U+19D7 -U+19D8 -U+19D9 -U+1A80 -U+1A81 -U+1A82 -U+1A83 -U+1A84 -U+1A85 -U+1A86 -U+1A87 -U+1A88 -U+1A89 -U+1A90 -U+1A91 -U+1A92 -U+1A93 -U+1A94 -U+1A95 -U+1A96 -U+1A97 -U+1A98 -U+1A99 -U+1B50 -U+1B51 -U+1B52 -U+1B53 -U+1B54 -U+1B55 -U+1B56 -U+1B57 -U+1B58 -U+1B59 -U+1BB0 -U+1BB1 -U+1BB2 -U+1BB3 -U+1BB4 -U+1BB5 -U+1BB6 -U+1BB7 -U+1BB8 -U+1BB9 -U+1C40 -U+1C41 -U+1C42 -U+1C43 -U+1C44 -U+1C45 -U+1C46 -U+1C47 -U+1C48 -U+1C49 -U+1C50 -U+1C51 -U+1C52 -U+1C53 -U+1C54 -U+1C55 -U+1C56 -U+1C57 -U+1C58 -U+1C59 U+1D7CE U+1D7CF U+1D7D0 @@ -568,83 +699,3 @@ U+1FBF6 U+1FBF7 U+1FBF8 U+1FBF9 -U+A620 -U+A621 -U+A622 -U+A623 -U+A624 -U+A625 -U+A626 -U+A627 -U+A628 -U+A629 -U+A8D0 -U+A8D1 -U+A8D2 -U+A8D3 -U+A8D4 -U+A8D5 -U+A8D6 -U+A8D7 -U+A8D8 -U+A8D9 -U+A900 -U+A901 -U+A902 -U+A903 -U+A904 -U+A905 -U+A906 -U+A907 -U+A908 -U+A909 -U+A9D0 -U+A9D1 -U+A9D2 -U+A9D3 -U+A9D4 -U+A9D5 -U+A9D6 -U+A9D7 -U+A9D8 -U+A9D9 -U+A9F0 -U+A9F1 -U+A9F2 -U+A9F3 -U+A9F4 -U+A9F5 -U+A9F6 -U+A9F7 -U+A9F8 -U+A9F9 -U+AA50 -U+AA51 -U+AA52 -U+AA53 -U+AA54 -U+AA55 -U+AA56 -U+AA57 -U+AA58 -U+AA59 -U+ABF0 -U+ABF1 -U+ABF2 -U+ABF3 -U+ABF4 -U+ABF5 -U+ABF6 -U+ABF7 -U+ABF8 -U+ABF9 -U+FF10 -U+FF11 -U+FF12 -U+FF13 -U+FF14 -U+FF15 -U+FF16 -U+FF17 -U+FF18 -U+FF19 diff --git a/level_0/f_utf/data/tests/values/decimal-all.txt b/level_0/f_utf/data/tests/values/decimal-all.txt new file mode 100644 index 0000000..c647e79 --- /dev/null +++ b/level_0/f_utf/data/tests/values/decimal-all.txt @@ -0,0 +1,701 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +50 +100 +500 +1000 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +50 +100 +500 +1000 +1000 +5000 +10000 +100 +100 +6 +50 +50000 +100000 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/level_0/f_utf/tests/unit/c/data-utf.c b/level_0/f_utf/tests/unit/c/data-utf.c index 0f83782..88755bd 100644 --- a/level_0/f_utf/tests/unit/c/data-utf.c +++ b/level_0/f_utf/tests/unit/c/data-utf.c @@ -19,6 +19,11 @@ FILE *data__bytesequence_file_open__control(void) { return fopen("./data/tests/bytesequences/control-all.txt", "r"); } +FILE *data__bytesequence_file_open__decimal(void) { + + return fopen("./data/tests/bytesequences/decimal-all.txt", "r"); +} + FILE *data__bytesequence_file_open__digit(void) { return fopen("./data/tests/bytesequences/digit-all.txt", "r"); @@ -132,6 +137,29 @@ ssize_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const char return bytes; } +FILE *data__value_file_open__decimal(void) { + + return fopen("./data/tests/values/decimal-all.txt", "r"); +} + +ssize_t data__value_get_line_long_long(FILE * const file, uint32_t * const value) { + + size_t length = 0; + char *line = 0; + + const ssize_t bytes = getline(&line, &length, file); + + if (bytes > 0) { + *value = (uint32_t) atol(line); + } + + if (line) { + free(line); + } + + return bytes; +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/level_0/f_utf/tests/unit/c/data-utf.h b/level_0/f_utf/tests/unit/c/data-utf.h index 31b7c50..b1ee533 100644 --- a/level_0/f_utf/tests/unit/c/data-utf.h +++ b/level_0/f_utf/tests/unit/c/data-utf.h @@ -68,6 +68,21 @@ extern FILE *data__bytesequence_file_open__combining(void); extern FILE *data__bytesequence_file_open__control(void); /** + * Open the "decimal" bytesequence file. + * + * This assumes the following: + * - The file path is relative to the current working directory (tests are run from project root). + * - The file path is "data/tests/bytesequences/decimal-all.txt". + * + * @return + * Non-zero on success. + * 0 on failure. + * + * @see fopen() + */ +extern FILE *data__bytesequence_file_open__decimal(void); + +/** * Open the "digit" bytesequence file. * * This assumes the following: @@ -286,7 +301,7 @@ extern FILE *data__bytesequence_file_open__zero_width(void); * * This should handle converting the number between big and little endian as needed. * - * The input file is expected to be in base-10 so that existing standarrd functions like atoll() can be easily used. + * The input file is expected to be in base-10 so that existing standard functions like atoll() can be easily used. * * @param file * The file stream. @@ -300,10 +315,47 @@ extern FILE *data__bytesequence_file_open__zero_width(void); * * @see atoll() * @see getline() - * @see htonl() */ extern ssize_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const character); +/** + * Open the "decimal_number" values file. + * + * This assumes the following: + * - The file path is relative to the current working directory (tests are run from project root). + * - The file path is "data/tests/values/decimal_number-all.txt". + * + * @return + * Non-zero on success. + * 0 on failure. + * + * @see fopen() + */ +extern FILE *data__value_file_open__decimal(void); + +/** + * Simple line reader that converts the line into a long long. + * + * This assumes the following: + * - The line only contains base-10 digits as ASCII characters. + * + * The input file is expected to be in base-10 so that existing standard functions like atoll() can be easily used. + * + * @param file + * The file stream. + * @param value + * The number read from the file at the current line in the stream. + * + * @return + * positive number on success where number represents bytes read. + * 0 on success and end of file is reached. + * -1 on failure. + * + * @see atol() + * @see getline() + */ +extern ssize_t data__value_get_line_long_long(FILE * const file, uint32_t * const value); + #ifdef __cplusplus } // extern "C" #endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.c new file mode 100644 index 0000000..8c2efed --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.c @@ -0,0 +1,47 @@ +#include "test-utf.h" +#include "test-utf-character_is_decimal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void test__f_utf_character_is_decimal__works(void **state) { + + { + FILE *file = data__bytesequence_file_open__decimal(); + FILE *file_number = data__value_file_open__decimal(); + + assert_non_null(file); + assert_non_null(file_number); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + ssize_t bytes_number = 0; + uint32_t number = 0; + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + bytes_number = data__value_get_line_long_long(file_number, &number); + + if (bytes > 0 && bytes_number > 0) { + uint32_t value = F_type_size_max_32_unsigned_d; + + const f_status_t status = f_utf_character_is_decimal(sequence, &value); + + assert_int_equal(status, F_true); + assert_int_equal(value, number); + } + + ++line; + + } while (bytes > 0 && bytes_number > 0); + + fclose(file); + fclose(file_number); + } +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.h b/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.h new file mode 100644 index 0000000..326307e --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.h @@ -0,0 +1,20 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_character_is_decimal_h +#define _TEST__F_utf_character_is_decimal_h + +/** + * Test that the function works. + * + * @see f_utf_character_is_decimal() + */ +extern void test__f_utf_character_is_decimal__works(void **state); + +#endif // _TEST__F_utf_character_is_decimal_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c index c96ec84..dd00bfb 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c @@ -20,7 +20,7 @@ void test__f_utf_is_alphabetic__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_combining.c b/level_0/f_utf/tests/unit/c/test-utf-is_combining.c index 9173f10..0c0a63a 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_combining.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_combining.c @@ -20,7 +20,7 @@ void test__f_utf_is_combining__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_control.c b/level_0/f_utf/tests/unit/c/test-utf-is_control.c index 5d8189e..43087b8 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_control.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_control.c @@ -20,7 +20,7 @@ void test__f_utf_is_control__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_decimal.c b/level_0/f_utf/tests/unit/c/test-utf-is_decimal.c new file mode 100644 index 0000000..53e1ba0 --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_decimal.c @@ -0,0 +1,64 @@ +#include "test-utf.h" +#include "test-utf-is_decimal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void test__f_utf_is_decimal__works(void **state) { + + { + FILE *file = data__bytesequence_file_open__decimal(); + FILE *file_number = data__value_file_open__decimal(); + + assert_non_null(file); + assert_non_null(file_number); + + f_utf_char_t sequence = 0; + ssize_t bytes = 0; + ssize_t bytes_number = 0; + uint32_t number = 0; + f_array_length_t line = 0; + + do { + bytes = data__bytesequence_get_line(file, &sequence); + bytes_number = data__value_get_line_long_long(file_number, &number); + + if (bytes > 0 && bytes_number > 0) { + const uint8_t width = macro_f_utf_char_t_width(sequence); + char buffer[5] = { 0, 0, 0, 0, 0 }; + + buffer[0] = macro_f_utf_char_t_to_char_1(sequence); + + if (width > 1) { + buffer[1] = macro_f_utf_char_t_to_char_2(sequence); + + if (width > 2) { + buffer[2] = macro_f_utf_char_t_to_char_3(sequence); + + if (width > 3) { + buffer[3] = macro_f_utf_char_t_to_char_4(sequence); + } + } + } + + uint32_t value = F_type_size_max_32_unsigned_d; + + const f_status_t status = f_utf_is_decimal(buffer, 5, &value); + + assert_int_equal(status, F_true); + assert_int_equal(value, number); + } + + ++line; + + } while (bytes > 0 && bytes_number > 0); + + fclose(file); + fclose(file_number); + } +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_decimal.h b/level_0/f_utf/tests/unit/c/test-utf-is_decimal.h new file mode 100644 index 0000000..3d795ee --- /dev/null +++ b/level_0/f_utf/tests/unit/c/test-utf-is_decimal.h @@ -0,0 +1,20 @@ +/** + * FLL - Level 0 + * + * Project: UTF + * API Version: 0.5 + * Licenses: lgpl-2.1-or-later + * + * Test the function in the utf project. + */ +#ifndef _TEST__F_utf_is_decimal_h +#define _TEST__F_utf_is_decimal_h + +/** + * Test that the function works. + * + * @see f_utf_is_decimal() + */ +extern void test__f_utf_is_decimal__works(void **state); + +#endif // _TEST__F_utf_is_decimal_h diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_digit.c b/level_0/f_utf/tests/unit/c/test-utf-is_digit.c index 5db8d58..37683e8 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_digit.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_digit.c @@ -20,7 +20,7 @@ void test__f_utf_is_digit__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c b/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c index 054eaf4..014d350 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c @@ -20,7 +20,7 @@ void test__f_utf_is_emoji__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c b/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c index 5801e81..c397486 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c @@ -20,7 +20,7 @@ void test__f_utf_is_numeric__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c b/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c index 88902b2..4e2b3aa 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c @@ -20,7 +20,7 @@ void test__f_utf_is_phonetic__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_private.c b/level_0/f_utf/tests/unit/c/test-utf-is_private.c index b5fdbef..e9d5f52 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_private.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_private.c @@ -20,7 +20,7 @@ void test__f_utf_is_private__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c b/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c index 54166b2..ca7f9e2 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c @@ -20,7 +20,7 @@ void test__f_utf_is_punctuation__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c b/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c index e04d93f..4924931 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c @@ -20,7 +20,7 @@ void test__f_utf_is_subscript__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c b/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c index 2fec11b..9180f95 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c @@ -20,7 +20,7 @@ void test__f_utf_is_superscript__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c b/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c index cac93b6..0646b0b 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c @@ -20,7 +20,7 @@ void test__f_utf_is_symbol__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c b/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c index c449727..eede603 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c @@ -20,7 +20,7 @@ void test__f_utf_is_whitespace__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_wide.c b/level_0/f_utf/tests/unit/c/test-utf-is_wide.c index d2efc6f..ddab706 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_wide.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_wide.c @@ -20,7 +20,7 @@ void test__f_utf_is_wide__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_word.c b/level_0/f_utf/tests/unit/c/test-utf-is_word.c index f9d5e0b..14bef6a 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_word.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_word.c @@ -20,7 +20,7 @@ void test__f_utf_is_word__strict_is_false(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; @@ -67,7 +67,7 @@ void test__f_utf_is_word__strict_is_true(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c b/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c index 00f6329..a3c183e 100644 --- a/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c +++ b/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c @@ -20,7 +20,7 @@ void test__f_utf_is_zero_width__works(void **state) { do { bytes = data__bytesequence_get_line(file, &sequence); - if (bytes) { + if (bytes > 0) { const uint8_t width = macro_f_utf_char_t_width(sequence); char buffer[5] = { 0, 0, 0, 0, 0 }; diff --git a/level_0/f_utf/tests/unit/c/test-utf.c b/level_0/f_utf/tests/unit/c/test-utf.c index 3661bdb..b5e8079 100644 --- a/level_0/f_utf/tests/unit/c/test-utf.c +++ b/level_0/f_utf/tests/unit/c/test-utf.c @@ -25,8 +25,12 @@ int main(void) { cmocka_unit_test(test__f_utf_append_nulless__works), //cmocka_unit_test(test__f_utf_character_is_alphabetic__works), + //cmocka_unit_test(test__f_utf_character_is_alphabetic_decimal__works), + //cmocka_unit_test(test__f_utf_character_is_alphabetic_digit__works), + //cmocka_unit_test(test__f_utf_character_is_alphabetic_numeric__works), cmocka_unit_test(test__f_utf_character_is_combining__works), cmocka_unit_test(test__f_utf_character_is_control__works), + cmocka_unit_test(test__f_utf_character_is_decimal__works), cmocka_unit_test(test__f_utf_character_is_digit__works), cmocka_unit_test(test__f_utf_character_is_emoji__works), //cmocka_unit_test(test__f_utf_character_is_numeric__works), @@ -133,8 +137,12 @@ int main(void) { cmocka_unit_test(test__f_utf_dynamicss_resize__works), //cmocka_unit_test(test__f_utf_is_alphabetic__works), + //cmocka_unit_test(test__f_utf_is_alphabetic_decimal__works), + //cmocka_unit_test(test__f_utf_is_alphabetic_digit__works), + //cmocka_unit_test(test__f_utf_is_alphabetic_numeric__works), cmocka_unit_test(test__f_utf_is_combining__works), cmocka_unit_test(test__f_utf_is_control__works), + cmocka_unit_test(test__f_utf_is_decimal__works), cmocka_unit_test(test__f_utf_is_digit__works), cmocka_unit_test(test__f_utf_is_emoji__works), //cmocka_unit_test(test__f_utf_is_numeric__works), diff --git a/level_0/f_utf/tests/unit/c/test-utf.h b/level_0/f_utf/tests/unit/c/test-utf.h index 8deda57..a8069e9 100644 --- a/level_0/f_utf/tests/unit/c/test-utf.h +++ b/level_0/f_utf/tests/unit/c/test-utf.h @@ -36,6 +36,7 @@ #include "test-utf-character_is_alphabetic.h" #include "test-utf-character_is_combining.h" #include "test-utf-character_is_control.h" +#include "test-utf-character_is_decimal.h" #include "test-utf-character_is_digit.h" #include "test-utf-character_is_emoji.h" #include "test-utf-character_is_numeric.h" @@ -104,6 +105,7 @@ #include "test-utf-is_alphabetic.h" #include "test-utf-is_combining.h" #include "test-utf-is_control.h" +#include "test-utf-is_decimal.h" #include "test-utf-is_digit.h" #include "test-utf-is_emoji.h" #include "test-utf-is_numeric.h" diff --git a/level_3/controller/c/controller/private-controller.c b/level_3/controller/c/controller/private-controller.c index 36be1b1..ce50b9a 100644 --- a/level_3/controller/c/controller/private-controller.c +++ b/level_3/controller/c/controller/private-controller.c @@ -786,7 +786,7 @@ extern "C" { if (name.string[i] == '_') continue; - status = f_utf_is_alphabetic_digit(name.string, name.used); + status = f_utf_is_alphabetic_decimal(name.string, name.used); if (F_status_is_error(status)) return status; if (status == F_false) return F_false; diff --git a/level_3/controller/c/controller/private-controller.h b/level_3/controller/c/controller/private-controller.h index a853151..618b113 100644 --- a/level_3/controller/c/controller/private-controller.h +++ b/level_3/controller/c/controller/private-controller.h @@ -381,10 +381,10 @@ extern "C" { * F_none if there is no string to validate (used = 0). * * Errors (with error bit) from: f_utf_is_alphabetic(). - * Errors (with error bit) from: f_utf_is_alphabetic_digit(). + * Errors (with error bit) from: f_utf_is_alphabetic_decimal(). * * @see f_utf_is_alphabetic() - * @see f_utf_is_alphabetic_digit() + * @see f_utf_is_alphabetic_decimal() */ #ifndef _di_controller_validate_define_name_ extern f_status_t controller_validate_environment_name(const f_string_static_t name) F_attribute_visibility_internal_d;