Redesign the digit and decimal behavior.
The is digit functions now refers to base-10 but does not attempt to return the identified digit.
The is decimal functions now refers to base-10 and supports providing the identified digit.
The is decimal functions also support other base units than just base-10.
The alphabetic digit/numeric functions now also have a alphebetic decimal function.
Clean up more places in the code using "sequence" rather than "character" or "characters".
Functions like f_utf_character_is_alpha_digit() are now like f_utf_character_is_alphabetic_digit().
Add related unit tests.
The is digit functions have unit tests that tests if the digit returned is correct.
I have not reviewed all of the "numeric" Unicode digits to confirm/deny that my is decimal functions are complete.
I observed what looks like bugs in the alphabetic functions.
In these cases the final return statement is returning F_false when they instead should be returning F_true.
There are minor corrections in documentation.
build_sources_library string.c private-string.c string/common.c string/dynamic.c string/map.c string/map_multi.c string/private-dynamic.c string/private-map.c string/private-map_multi.c string/private-quantity.c string/private-range.c string/private-triple.c string/quantity.c string/range.c string/static.c string/triple.c
build_sources_library type_array/array_length.c type_array/cell.c type_array/fll_id.c type_array/int8.c type_array/int16.c type_array/int32.c type_array/int64.c type_array/int128.c type_array/state.c type_array/status.c type_array/uint8.c type_array/uint16.c type_array/uint32.c type_array/uint64.c type_array/uint128.c
build_sources_library type_array/private-array_length.c type_array/private-cell.c type_array/private-fll_id.c type_array/private-int8.c type_array/private-int16.c type_array/private-int32.c type_array/private-int64.c type_array/private-int128.c type_array/private-state.c type_array/private-status.c type_array/private-uint8.c type_array/private-uint16.c type_array/private-uint32.c type_array/private-uint64.c type_array/private-uint128.c
-build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c
+build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_decimal.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c
build_sources_library utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/private-dynamic.c utf/private-map.c utf/private-map_multi.c utf/private-triple.c utf/private-is_unassigned.c utf/private-string.c utf/static.c utf/string.c utf/triple.c
build_sources_library-level thread.c private-thread.c
build_sources_library_shared
build_sources_library level_0/string.c level_0/private-string.c level_0/string/common.c level_0/string/dynamic.c level_0/string/map.c level_0/string/map_multi.c level_0/string/private-dynamic.c level_0/string/private-map.c level_0/string/private-map_multi.c level_0/string/private-quantity.c level_0/string/private-range.c level_0/string/private-triple.c level_0/string/quantity.c level_0/string/range.c level_0/string/static.c level_0/string/triple.c
build_sources_library level_0/type_array/array_length.c level_0/type_array/cell.c level_0/type_array/fll_id.c level_0/type_array/int8.c level_0/type_array/int16.c level_0/type_array/int32.c level_0/type_array/int64.c level_0/type_array/int128.c level_0/type_array/state.c level_0/type_array/status.c level_0/type_array/uint8.c level_0/type_array/uint16.c level_0/type_array/uint32.c level_0/type_array/uint64.c level_0/type_array/uint128.c
build_sources_library level_0/type_array/private-array_length.c level_0/type_array/private-cell.c level_0/type_array/private-fll_id.c level_0/type_array/private-int8.c level_0/type_array/private-int16.c level_0/type_array/private-int32.c level_0/type_array/private-int64.c level_0/type_array/private-int128.c level_0/type_array/private-state.c level_0/type_array/private-status.c level_0/type_array/private-uint8.c level_0/type_array/private-uint16.c level_0/type_array/private-uint32.c level_0/type_array/private-uint64.c level_0/type_array/private-uint128.c
-build_sources_library level_0/utf.c level_0/private-utf.c level_0/private-utf_alphabetic.c level_0/private-utf_combining.c level_0/private-utf_control.c level_0/private-utf_digit.c level_0/private-utf_emoji.c level_0/private-utf_numeric.c level_0/private-utf_phonetic.c level_0/private-utf_private.c level_0/private-utf_punctuation.c level_0/private-utf_subscript.c level_0/private-utf_superscript.c level_0/private-utf_symbol.c level_0/private-utf_valid.c level_0/private-utf_whitespace.c level_0/private-utf_wide.c level_0/private-utf_word.c level_0/private-utf_zero_width.c
+build_sources_library level_0/utf.c level_0/private-utf.c level_0/private-utf_alphabetic.c level_0/private-utf_combining.c level_0/private-utf_control.c level_0/private-utf_decimal.c level_0/private-utf_digit.c level_0/private-utf_emoji.c level_0/private-utf_numeric.c level_0/private-utf_phonetic.c level_0/private-utf_private.c level_0/private-utf_punctuation.c level_0/private-utf_subscript.c level_0/private-utf_superscript.c level_0/private-utf_symbol.c level_0/private-utf_valid.c level_0/private-utf_whitespace.c level_0/private-utf_wide.c level_0/private-utf_word.c level_0/private-utf_zero_width.c
build_sources_library level_0/utf/common.c level_0/utf/convert.c level_0/utf/dynamic.c level_0/utf/is.c level_0/utf/is_character.c level_0/utf/map.c level_0/utf/map_multi.c level_0/utf/static.c level_0/utf/string.c level_0/utf/triple.c level_0/utf/private-dynamic.c level_0/utf/private-is_unassigned.c level_0/utf/private-map.c level_0/utf/private-map_multi.c level_0/utf/private-triple.c level_0/utf/private-string.c
build_sources_library level_1/control_group.c
extern "C" {
#endif
-#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
+#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
f_status_t private_f_utf_char_to_character(const f_string_t sequence, const f_array_length_t width_max, f_utf_char_t *character_utf) {
if (!macro_f_utf_byte_width_is(*sequence)) {
return F_none;
}
-#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
+#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
#if !defined(_di_f_utf_unicode_to_) || !defined(_di_f_utf_character_unicode_to_)
f_status_t private_f_utf_character_unicode_to(const f_utf_char_t sequence, uint32_t *codepoint) {
* @see f_utf_character_is_valid()
* @see f_utf_is_valid()
* @see f_utf_is_alphabetic()
- * @see f_utf_is_alphabetic_digit()
+ * @see f_utf_is_alphabetic_decimal()
* @see f_utf_is_alphabetic_numeric()
* @see f_utf_is_ascii()
* @see f_utf_is_combining()
* @see f_utf_is_control()
* @see f_utf_is_control_picture()
+ * @see f_utf_is_decimal()
* @see f_utf_is_digit()
* @see f_utf_is_emoji()
* @see f_utf_is_graph()
* @see f_utf_is_zero_width()
* @see f_utf_unicode_to()
*/
-#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
+#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
extern f_status_t private_f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_char_t *character_utf) F_attribute_visibility_internal_d;
-#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
+#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
/**
* Private implementation of f_utf_character_is_zero_width().
#include "private-utf_alphabetic.h"
#include "private-utf_combining.h"
#include "private-utf_control.h"
+#include "private-utf_decimal.h"
#include "private-utf_digit.h"
#include "private-utf_numeric.h"
#include "private-utf_phonetic.h"
}
#endif // !defined(_di_f_utf_character_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_)
+#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_)
+ f_status_t private_f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) {
+
+ if (private_f_utf_character_is_decimal(sequence, F_true, value)) {
+ return F_true;
+ }
+
+ if (private_f_utf_character_is_zero_width(sequence)) {
+ return F_false;
+ }
+
+ // The is_control() handles both is_control_code() and is_control_format().
+ if (private_f_utf_character_is_control(sequence)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_control_picture(sequence)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_whitespace(sequence)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_whitespace_modifier(sequence)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_numeric(sequence)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_punctuation(sequence)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_symbol(sequence)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_phonetic(sequence)) {
+ return F_false;
+ }
+
+ return F_true;
+ }
+#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_)
+
#if !defined(_di_f_utf_character_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_digit_)
f_status_t private_f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence) {
return F_false;
}
- return F_false;
+ return F_true;
}
#endif // !defined(_di_f_utf_character_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_digit_)
return F_false;
}
- return F_false;
+ return F_true;
}
#endif // !defined(_di_f_utf_character_is_alphabetic_numeric_) || !defined(_di_f_utf_is_alphabetic_numeric_)
#endif // !defined(_di_f_utf_character_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_)
/**
+ * Private implementation of f_utf_character_is_alphabetic_decimal().
+ *
+ * Intended to be shared to each of the different implementation variations.
+ *
+ * This expects the character width to be of at least size 2.
+ *
+ * @param sequence
+ * The byte sequence to validate as a character.
+ * @param value
+ * (optional) The integer representation of the character if the character is a decimal.
+ * If specified, value is set to 0xffffffff (F_type_size_max_32_unsigned_d) to represent no known representation.
+ * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ * If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ * If specified and is initially a value of 0xffffffff (F_type_size_max_32_unsigned_d), then this will grab all known integer digits.
+ * Set to NULL to not use.
+ *
+ * @return
+ * F_true if a UTF-8 alphabetic or a digit character.
+ * F_false if not a UTF-8 alphabetic nor a digit character.
+ *
+ * F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ * F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see f_utf_character_is_alphabetic_decimal()
+ * @see f_utf_is_alphabetic_decimal()
+ */
+#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_)
+ extern f_status_t private_f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) F_attribute_visibility_internal_d;
+#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_)
+
+/**
* Private implementation of f_utf_character_is_alphabetic_digit().
*
* Intended to be shared to each of the different implementation variations.
--- /dev/null
+#include "utf.h"
+#include "private-utf.h"
+#include "private-utf_decimal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Inline helper function to reduce amount of code typed.
+ *
+ * Given the value, this will conditionally convert the range into an appropriate base-10 integer.
+ *
+ * This does not handle non-decimal values (non-base-10).
+ *
+ * @param sequence
+ * The character sequence to process.
+ * @param always
+ * The always return F_true boolean.
+ * @param start
+ * An inclusive start range.
+ * The stop range is determined from this.
+ * @param value
+ * The value to update, if non-NULL.
+ *
+ * @return
+ * F_true for valid digit in the requested range.
+ * F_false, otherwise.
+ */
+static inline f_status_t private_inline_f_utf_character_handle_decimal(const f_utf_char_t sequence, const bool always, const f_utf_char_t start, uint32_t * const value) {
+
+ if (value) {
+ f_char_t ascii = 0;
+
+ if (macro_f_utf_char_t_width(sequence) == 2) {
+ ascii = (f_char_t) macro_f_utf_char_t_to_char_2(sequence - start);
+ }
+ else if (macro_f_utf_char_t_width(sequence) == 3) {
+ ascii = (f_char_t) macro_f_utf_char_t_to_char_3(sequence - start);
+ }
+ else if (macro_f_utf_char_t_width(sequence) == 4) {
+ ascii = (f_char_t) macro_f_utf_char_t_to_char_4(sequence - start);
+ }
+
+ ascii += 0x30;
+
+ return private_f_utf_character_is_decimal_for_ascii(ascii, always, value);
+ }
+
+ return F_true;
+}
+
+/**
+ * Inline helper function to reduce amount of code typed.
+ *
+ * Given the value, this will conditionally convert the range into an appropriate base-10 integer from 1 to 9.
+ *
+ * This does not handle non-decimal values (non-base-10).
+ *
+ * @param sequence
+ * The character sequence to process.
+ * @param always
+ * The always return F_true boolean.
+ * @param start
+ * An inclusive start range.
+ * The stop range is determined from this.
+ * @param value
+ * The value to update, if non-NULL.
+ *
+ * @return
+ * F_true for valid digit in the requested range.
+ * F_false, otherwise.
+ */
+static inline f_status_t private_inline_f_utf_character_handle_roman_numeral(const f_utf_char_t sequence, const bool always, const f_utf_char_t start, uint32_t * const value) {
+
+ if (value) {
+ const f_char_t ascii = ((f_char_t) macro_f_utf_char_t_to_char_3(sequence - start)) + 0x31;
+
+ return private_f_utf_character_is_decimal_for_ascii(ascii, always, value);
+ }
+
+ return F_true;
+}
+
+#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+ f_status_t private_f_utf_character_is_decimal(const f_utf_char_t sequence, const bool always, uint32_t * const value) {
+
+ if (macro_f_utf_char_t_width_is(sequence) == 2) {
+
+ // Arabic: U+0660 to U+0669.
+ if (sequence >= 0xd9a00000 && sequence <= 0xd9a90000) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xd9a00000, value);
+ }
+
+ // Extended Arabic: U+06F0 to U+06F9.
+ if (sequence >= 0xdbb00000 && sequence <= 0xdbb90000) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xdbb00000, value);
+ }
+
+ // NKo: U+07C0 to U+07C9.
+ if (sequence >= 0xdf800000 && sequence <= 0xdf890000) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xdf800000, value);
+ }
+
+ return F_false;
+ }
+
+ if (macro_f_utf_char_t_width_is(sequence) == 3) {
+
+ if (macro_f_utf_char_t_to_char_1(sequence) == 0xe0) {
+
+ // Devanagari: U+0966 to U+096F.
+ if (sequence >= 0xe0a5a600 && sequence <= 0xe0a5af00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a5a600, value);
+ }
+
+ // Bengali: U+09E6 to U+09EF.
+ if (sequence >= 0xe0a7a600 && sequence <= 0xe0a7af00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a7a600, value);
+ }
+
+ // Gurmukhi: U+0A66 to U+0A6F.
+ if (sequence >= 0xe0a9a600 && sequence <= 0xe0a9af00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a9a600, value);
+ }
+
+ // Gujarati: U+0AE6 to U+0AEF.
+ if (sequence >= 0xe0aba600 && sequence <= 0xe0abaf00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0aba600, value);
+ }
+
+ // Oriya: U+0B66 to U+0B6F.
+ if (sequence >= 0xe0ada600 && sequence <= 0xe0adaf00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0ada600, value);
+ }
+
+ // Tamil: U+0BE6 to U+0BEF.
+ if (sequence >= 0xe0afa600 && sequence <= 0xe0afaf00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0afa600, value);
+ }
+
+ // Telugu: U+0C66 to U+0C6F.
+ if (sequence >= 0xe0b1a600 && sequence <= 0xe0b1af00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b1a600, value);
+ }
+
+ // Kannada: U+0CE6 to U+0CEF.
+ if (sequence >= 0xe0b3a600 && sequence <= 0xe0b3af00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b3a600, value);
+ }
+
+ // Malayalam: U+0D66 to U+0D6F.
+ if (sequence >= 0xe0b5a600 && sequence <= 0xe0b5af00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b5a600, value);
+ }
+
+ // Sinhala: U+0DE6 to U+0DEF.
+ if (sequence >= 0xe0b7a600 && sequence <= 0xe0b7af00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b7a600, value);
+ }
+
+ // Thai: U+0E50 to U+0E59.
+ if (sequence >= 0xe0b99000 && sequence <= 0xe0b99900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b99000, value);
+ }
+
+ // Lao: U+0ED0 to U+0ED9.
+ if (sequence >= 0xe0bb9000 && sequence <= 0xe0bb9900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0bb9000, value);
+ }
+
+ // Tibetan: U+0F20 to U+0F29.
+ if (sequence >= 0xe0bca000 && sequence <= 0xe0bca900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0bca000, value);
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe1) {
+
+ // Myanmar: U+1040 to U+1049.
+ if (sequence >= 0xe1818000 && sequence <= 0xe1818900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1818000, value);
+ }
+
+ // Myanmar (Shan): U+1090 to U+1099.
+ if (sequence >= 0xe1829000 && sequence <= 0xe1829900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1829000, value);
+ }
+
+ // Khmer: U+17E0 to U+17E9.
+ if (sequence >= 0xe19fa000 && sequence <= 0xe19fa900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe19fa000, value);
+ }
+
+ // Mongolian: U+1810 to U+1819.
+ if (sequence >= 0xe1a09000 && sequence <= 0xe1a09900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a09000, value);
+ }
+
+ // Limbu: U+1946 to U+194F.
+ if (sequence >= 0xe1a58600 && sequence <= 0xe1a58f00) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a58600, value);
+ }
+
+ // New Tai Lue: U+19D0 to U+19D9.
+ if (sequence >= 0xe1a79000 && sequence <= 0xe1a79900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a79000, value);
+ }
+
+ // Tai Tham (Hora): U+1A80 to U+1A89.
+ if (sequence >= 0xe1aa8000 && sequence <= 0xe1aa8900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aa8000, value);
+ }
+
+ // Tai Tham (Tham): U+1A90 to U+1A99.
+ if (sequence >= 0xe1aa9000 && sequence <= 0xe1aa9900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aa9000, value);
+ }
+
+ // Balinese: U+1B50 to U+1B59.
+ if (sequence >= 0xe1ad9000 && sequence <= 0xe1ad9900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1ad9000, value);
+ }
+
+ // Sundanese: U+1BB0 to U+1BB9.
+ if (sequence >= 0xe1aeb000 && sequence <= 0xe1aeb900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aeb000, value);
+ }
+
+ // Lepcha: U+1C40 to U+1C49.
+ if (sequence >= 0xe1b18000 && sequence <= 0xe1b18900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1b18000, value);
+ }
+
+ // Ol Chiki: U+1C50 to U+1C59.
+ if (sequence >= 0xe1b19000 && sequence <= 0xe1b19900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1b19000, value);
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) {
+
+ // Number Forms (Roman Numerals): U+2160 to U+2188.
+ if (sequence >= 0xe285a000 && sequence <= 0xe2868800) {
+
+ // Roman Numerals (large) for 1-9: U+2160 to U+2168.
+ if (sequence >= 0xe285a000 && sequence <= 0xe285a800) {
+ return private_inline_f_utf_character_handle_roman_numeral(sequence, always, 0xe285a000, value);
+ }
+
+ // Roman Numerals (small) for 1-9: U+2170 to U+2178.
+ if (sequence >= 0xe285b000 && sequence <= 0xe285b800) {
+ return private_inline_f_utf_character_handle_roman_numeral(sequence, always, 0xe285b000, value);
+ }
+
+ // Roman Numeral (late form) for 6: U+2185.
+ if (!value || *value > 5) {
+ if (sequence == 0xe2868500) {
+ *value = 6;
+
+ return F_true;
+ }
+ }
+
+ if (value) {
+ if (*value > 9) {
+
+ // Roman Numeral: U+2169, U+2179.
+ if (sequence == 0xe285a900 || sequence == 0xe285b900) {
+ *value = 10;
+
+ return F_true;
+ }
+
+ if (*value > 10) {
+
+ // Roman Numeral: U+216A, U+217A.
+ if (sequence == 0xe285aa00 || sequence == 0xe285ba00) {
+ *value = 11;
+
+ return F_true;
+ }
+
+ if (*value > 11) {
+
+ // Roman Numeral: U+216B, U+217B.
+ if (sequence == 0xe285ab00 || sequence == 0xe285bb00) {
+ *value = 12;
+
+ return F_true;
+ }
+
+ // All remaining are out of the range 0-16 and value must be set to 0xffff for them to be processed.
+ if (*value == F_type_size_max_32_unsigned_d) {
+
+ // Roman Numeral: U+216C, U+217C, U+2186.
+ if (sequence == 0xe285ac00 || sequence == 0xe285bc00 || sequence == 0xe2868600) {
+ *value = 50;
+
+ return F_true;
+ }
+
+ // Roman Numeral: U+216D, U+217D, U+2183 (reversed, large), U+2184 (reversed, small).
+ if (sequence == 0xe285ad00 || sequence == 0xe285bd00 || sequence == 0xe2868300 || sequence == 0xe2868400) {
+ *value = 100;
+
+ return F_true;
+ }
+
+ // Roman Numeral: U+216E, U+217E.
+ if (sequence == 0xe285ae00 || sequence == 0xe285be00) {
+ *value = 500;
+
+ return F_true;
+ }
+
+ // Roman Numeral: U+216F, U+217F, U+2180 (1000 "CD").
+ if (sequence == 0xe285af00 || sequence == 0xe285bf00 || sequence == 0xe2868000) {
+ *value = 1000;
+
+ return F_true;
+ }
+
+ // Roman Numeral: U+2181.
+ if (sequence == 0xe2868100) {
+ *value = 5000;
+
+ return F_true;
+ }
+
+ // Roman Numeral: U+2182.
+ if (sequence == 0xe2868200) {
+ *value = 10000;
+
+ return F_true;
+ }
+
+ // Roman Numeral: U+2187.
+ if (sequence == 0xe2868700) {
+ *value = 50000;
+
+ return F_true;
+ }
+
+ // Roman Numeral: U+2188.
+ if (sequence == 0xe2868800) {
+ *value = 100000;
+
+ return F_true;
+ }
+ }
+ }
+ }
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ if (always) {
+ return F_true;
+ }
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_1(sequence) == 0xea) {
+
+ // Vai: U+A620 to U+A629.
+ if (sequence >= 0xea98a000 && sequence <= 0xea98a900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xea98a000, value);
+ }
+
+ // Saurashtra: U+A8D0 to U+A8D9.
+ if (sequence >= 0xeaa39000 && sequence <= 0xeaa39900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa39000, value);
+ }
+
+ // Kayah Li: U+A900 to U+A909.
+ if (sequence >= 0xeaa48000 && sequence <= 0xeaa48900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa48000, value);
+ }
+
+ // Javanese: U+A9D0 to U+A9D9.
+ if (sequence >= 0xeaa79000 && sequence <= 0xeaa79900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa79000, value);
+ }
+
+ // Myanmar Extended-B: U+A9F0 to U+A9F9.
+ if (sequence >= 0xeaa7b000 && sequence <= 0xeaa7b900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa7b000, value);
+ }
+
+ // Cham: U+AA50 to U+AA59.
+ if (sequence >= 0xeaa99000 && sequence <= 0xeaa99900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa99000, value);
+ }
+
+ // Meetei Mayek: U+ABF0 to U+ABF9.
+ if (sequence >= 0xeaafb000 && sequence <= 0xeaafb900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaafb000, value);
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_1(sequence) == 0xef) {
+
+ // Halfwidth and Fullwidth Forms: U+FF10 to U+FF19.
+ if (sequence >= 0xefbc9000 && sequence <= 0xefbc9900) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xefbc9000, value);
+ }
+ }
+
+ return F_false;
+ }
+
+ if (macro_f_utf_char_t_to_char_1(sequence) == 0xf0) {
+
+ if (macro_f_utf_char_t_to_char_2(sequence) == 0x90) {
+
+ // Osmanya: U+104A0 to U+104A9.
+ if (sequence >= 0xf09092a0 && sequence <= 0xf09092a9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09092a0, value);
+ }
+
+ // Hanifi Rohingya: U+10D30 to U+10D39.
+ if (sequence >= 0xf090b4b0 && sequence <= 0xf090b4b9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf090b4b0, value);
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_2(sequence) == 0x91) {
+
+ // Brahmi: U+11066 to U+1106F.
+ if (sequence >= 0xf09181a6 && sequence <= 0xf09181af) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09181a6, value);
+ }
+
+ // Sora Sompeng: U+110F0 to U+110F9.
+ if (sequence >= 0xf09183b0 && sequence <= 0xf09183b9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09183b0, value);
+ }
+
+ // Chakma: U+11136 to U+1113F.
+ if (sequence >= 0xf09184b6 && sequence <= 0xf09184bf) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09184b6, value);
+ }
+
+ // Sharada: U+111D0 to U+111D9.
+ if (sequence >= 0xf0918790 && sequence <= 0xf0918799) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0918790, value);
+ }
+
+ // Khudawadi: U+112F0 to U+112F9.
+ if (sequence >= 0xf0918bb0 && sequence <= 0xf0918bb9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0918bb0, value);
+ }
+
+ // Newa: U+11450 to U+11459.
+ if (sequence >= 0xf0919190 && sequence <= 0xf0919199) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919190, value);
+ }
+
+ // Tirhuta: U+114D0 to U+114D9.
+ if (sequence >= 0xf0919390 && sequence <= 0xf0919399) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919390, value);
+ }
+
+ // Modi: U+11650 to U+11659.
+ if (sequence >= 0xf0919990 && sequence <= 0xf0919999) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919990, value);
+ }
+
+ // Takri: U+116C0 to U+116C9.
+ if (sequence >= 0xf0919b80 && sequence <= 0xf0919b89) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919b80, value);
+ }
+
+ // Ahom: U+11730 to U+11739.
+ if (sequence >= 0xf0919cb0 && sequence <= 0xf0919cb9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919cb0, value);
+ }
+
+ // Warang Citi: U+118E0 to U+118E9.
+ if (sequence >= 0xf091a3a0 && sequence <= 0xf091a3a9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091a3a0, value);
+ }
+
+ // Dives Akuru: U+11950 to U+11959.
+ if (sequence >= 0xf091a590 && sequence <= 0xf091a599) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091a590, value);
+ }
+
+ // Bhaiksuki: U+11C50 to U+11C59.
+ if (sequence >= 0xf091b190 && sequence <= 0xf091b199) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b190, value);
+ }
+
+ // Masaram Gondi: U+11D50 to U+11D59.
+ if (sequence >= 0xf091b590 && sequence <= 0xf091b599) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b590, value);
+ }
+
+ // Gunjala Gondi: U+11DA0 to U+11DA9.
+ if (sequence >= 0xf091b6a0 && sequence <= 0xf091b6a9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b6a0, value);
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_2(sequence) == 0x96) {
+
+ // Mro: U+16A60 to U+16A69.
+ if (sequence >= 0xf096a9a0 && sequence <= 0xf096a9a9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096a9a0, value);
+ }
+
+ // Tangsa: U+16AC0 to U+16AC9.
+ if (sequence >= 0xf096ab80 && sequence <= 0xf096ab89) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096ab80, value);
+ }
+
+ // Pahawh Hmong: U+16B50 to U+16B59.
+ if (sequence >= 0xf096ad90 && sequence <= 0xf096ad99) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096ad90, value);
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9d) {
+
+ // Mathematical Alphanumeric (Bold) Symbols: U+1D7CE to U+1D7D7.
+ if (sequence >= 0xf09d9f8e && sequence <= 0xf09d9f97) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9f8e, value);
+ }
+
+ // Mathematical Alphanumeric (Double-Struck) Symbols: U+1D7D8 to U+1D7E1.
+ if (sequence >= 0xf09d9f98 && sequence <= 0xf09d9fa1) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9f98, value);
+ }
+
+ // Mathematical Alphanumeric (Sans-Serif) Symbols: U+1D7E2 to U+1D7EB.
+ if (sequence >= 0xf09d9fa2 && sequence <= 0xf09d9fab) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fa2, value);
+ }
+
+ // Mathematical Alphanumeric (Sans-Serif Bold) Symbols: U+1D7EC to U+1D7F5.
+ if (sequence >= 0xf09d9fac && sequence <= 0xf09d9fb5) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fac, value);
+ }
+
+ // Mathematical Alphanumeric (Monospace) Symbols: U+1D7F6 to U+1D7FF.
+ if (sequence >= 0xf09d9fb6 && sequence <= 0xf09d9fbf) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fb6, value);
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9e) {
+
+ // Nyiakeng Puachue Hmong: U+1E140 to U+1E149.
+ if (sequence >= 0xf09e8580 && sequence <= 0xf09e8589) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09e8580, value);
+ }
+
+ // Wancho: U+1E2F0 to U+1E2F9.
+ if (sequence >= 0xf09e8bb0 && sequence <= 0xf09e8bb9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09e8bb0, value);
+ }
+
+ // Adlam: U+1E950 to U+1E959.
+ if (sequence >= 0xf09ea590 && sequence <= 0xf09ea599) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09ea590, value);
+ }
+ }
+ else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9f) {
+
+ // Symbols for Legacy Computing (Segmented): U+1FBF0 to U+1FBF9.
+ if (sequence >= 0xf09fafb0 && sequence <= 0xf09fafb9) {
+ return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09fafb0, value);
+ }
+ }
+ }
+
+ if (value) {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_false;
+ }
+#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+
+#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+ f_status_t private_f_utf_character_is_decimal_for_ascii(const f_char_t character, const bool always, uint32_t * const value) {
+
+ if (always) {
+ if (isdigit(character)) {
+ if (value) {
+ if (character == f_string_ascii_0_s.string[0]) {
+ *value = 0;
+
+ return F_true;
+ }
+ else if (character == f_string_ascii_1_s.string[0]) {
+ if (!*value || *value > 1) {
+ *value = 1;
+ }
+ else {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+ else if (character == f_string_ascii_2_s.string[0]) {
+ if (!*value || *value > 2) {
+ *value = 2;
+ }
+ else {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+ else if (character == f_string_ascii_3_s.string[0]) {
+ if (!*value || *value > 3) {
+ *value = 3;
+ }
+ else {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+ else if (character == f_string_ascii_4_s.string[0]) {
+ if (!*value || *value > 4) {
+ *value = 4;
+ }
+ else {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+ else if (character == f_string_ascii_5_s.string[0]) {
+ if (!*value || *value > 5) {
+ *value = 5;
+ }
+ else {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+ else if (character == f_string_ascii_6_s.string[0]) {
+ if (!*value || *value > 6) {
+ *value = 6;
+
+ return F_true;
+ }
+ }
+ else if (character == f_string_ascii_7_s.string[0]) {
+ if (!*value || *value > 7) {
+ *value = 7;
+ }
+ else {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+ else if (character == f_string_ascii_8_s.string[0]) {
+ if (!*value || *value > 8) {
+ *value = 8;
+ }
+ else {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+ else if (character == f_string_ascii_9_s.string[0]) {
+ if (!*value || *value > 9) {
+ *value = 9;
+ }
+ else {
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+
+ if (*value > 10) {
+ if (character == f_string_ascii_a_s.string[0] || character == f_string_ascii_A_s.string[0]) {
+ *value = 10;
+
+ return F_true;
+ }
+
+ if (*value > 11) {
+ if (character == f_string_ascii_b_s.string[0] || character == f_string_ascii_B_s.string[0]) {
+ *value = 11;
+
+ return F_true;
+ }
+
+ if (*value > 12) {
+ if (character == f_string_ascii_c_s.string[0] || character == f_string_ascii_C_s.string[0]) {
+ *value = 12;
+
+ return F_true;
+ }
+
+ if (*value > 13) {
+ if (character == f_string_ascii_d_s.string[0] || character == f_string_ascii_D_s.string[0]) {
+ *value = 13;
+
+ return F_true;
+ }
+
+ if (*value > 14) {
+ if (character == f_string_ascii_e_s.string[0] || character == f_string_ascii_E_s.string[0]) {
+ *value = 14;
+
+ return F_true;
+ }
+
+ if (*value > 15) {
+ if (character == f_string_ascii_f_s.string[0] || character == f_string_ascii_F_s.string[0]) {
+ *value = 15;
+
+ return F_true;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+ }
+
+ return F_true;
+ }
+
+ return F_false;
+ }
+
+ if (isdigit(character)) {
+ if (value) {
+ if (character == f_string_ascii_0_s.string[0]) {
+ *value = 0;
+
+ return F_true;
+ }
+
+ if (character == f_string_ascii_1_s.string[0]) {
+ if (!*value || *value > 1) {
+ *value = 1;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (character == f_string_ascii_2_s.string[0]) {
+ if (!*value || *value > 2) {
+ *value = 2;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (character == f_string_ascii_3_s.string[0]) {
+ if (!*value || *value > 3) {
+ *value = 3;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (character == f_string_ascii_4_s.string[0]) {
+ if (!*value || *value > 4) {
+ *value = 4;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (character == f_string_ascii_5_s.string[0]) {
+ if (!*value || *value > 5) {
+ *value = 5;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (character == f_string_ascii_6_s.string[0]) {
+ if (!*value || *value > 6) {
+ *value = 6;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (character == f_string_ascii_7_s.string[0]) {
+ if (!*value || *value > 7) {
+ *value = 7;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (character == f_string_ascii_8_s.string[0]) {
+ if (!*value || *value > 8) {
+ *value = 8;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (character == f_string_ascii_9_s.string[0]) {
+ if (!*value || *value > 9) {
+ *value = 9;
+
+ return F_true;
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ if (*value > 10) {
+ if (character == f_string_ascii_a_s.string[0] || character == f_string_ascii_A_s.string[0]) {
+ *value = 10;
+
+ return F_true;
+ }
+
+ if (*value > 11) {
+ if (character == f_string_ascii_b_s.string[0] || character == f_string_ascii_B_s.string[0]) {
+ *value = 11;
+
+ return F_true;
+ }
+
+ if (*value > 12) {
+ if (character == f_string_ascii_c_s.string[0] || character == f_string_ascii_C_s.string[0]) {
+ *value = 12;
+
+ return F_true;
+ }
+
+ if (*value > 13) {
+ if (character == f_string_ascii_d_s.string[0] || character == f_string_ascii_D_s.string[0]) {
+ *value = 13;
+
+ return F_true;
+ }
+
+ if (*value > 14) {
+ if (character == f_string_ascii_e_s.string[0] || character == f_string_ascii_E_s.string[0]) {
+ *value = 14;
+
+ return F_true;
+ }
+
+ if (*value > 15) {
+ if (character == f_string_ascii_f_s.string[0] || character == f_string_ascii_F_s.string[0]) {
+ *value = 15;
+
+ return F_true;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ *value = F_type_size_max_32_unsigned_d;
+
+ return F_false;
+ }
+
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
--- /dev/null
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgpl-2.1-or-later
+ *
+ * Provides UTF-8 capabilities.
+ *
+ * These are provided for internal reduction in redundant code.
+ * These should not be exposed/used outside of this project.
+ */
+#ifndef _PRIVATE_F_utf_decimal_h
+#define _PRIVATE_F_utf_decimal_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Private implementation of f_utf_character_is_decimal().
+ *
+ * Intended to be shared to each of the different implementation variations.
+ *
+ * This expects the character width to be of at least size 2.
+ *
+ * @param sequence
+ * The byte sequence to validate as a character.
+ * @param always
+ * Set to F_true to always return F_true for valid digits even if the valid digit would be invalid because it is out of the requested base range.
+ * When F_false, this function returns F_true if the decimal digit is a valid decimal digit within the requested base range.
+ * @param value
+ * (optional) The integer representation of the character if the character is a decimal.
+ * If specified, value is set to 0xffff to represent no known representation.
+ * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ * If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ * If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ * Set to NULL to not use.
+ *
+ * @return
+ * F_true if a UTF-8 decimal character.
+ * F_false if not a UTF-8 decimal character.
+ *
+ * F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ * F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see f_utf_character_is_decimal()
+ * @see f_utf_is_decimal()
+ */
+#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+ extern f_status_t private_f_utf_character_is_decimal(const f_utf_char_t sequence, const bool always, uint32_t * const value) F_attribute_visibility_internal_d;
+#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+
+/**
+ * Helper function for handling ascii-only tests.
+ *
+ * The width is always assumed to be 1.
+ *
+ * @param character
+ * The ASCII character to validate.
+ * @param always
+ * Set to F_true to always return F_true for valid digits even if the valid digit would be invalid because it is out of the requested base range.
+ * When F_false, this function returns F_true if the decimal digit is a valid decimal digit within the requested base range.
+ * @param value
+ * (optional) The integer representation of the character if the character is a decimal.
+ * If specified, value is set to 0xffffffff (F_type_size_max_32_unsigned_d) to represent no known representation.
+ * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ * If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ * If specified and is initially a value of 0xffffffff (F_type_size_max_32_unsigned_d), then this will grab all known integer digits.
+ * Set to NULL to not use.
+ *
+ * @return
+ * F_true if a UTF-8 decimal character.
+ * F_false if not a UTF-8 decimal character.
+ *
+ * @see isdigit()
+ *
+ * @see f_utf_character_is_decimal()
+ * @see f_utf_is_decimal()
+ */
+#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+ extern f_status_t private_f_utf_character_is_decimal_for_ascii(const f_char_t character, const bool always, uint32_t * const value) F_attribute_visibility_internal_d;
+#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _PRIVATE_F_utf_decimal_h
extern "C" {
#endif
-#if !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_)
+#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_)
f_status_t private_f_utf_character_is_digit(const f_utf_char_t sequence) {
if (macro_f_utf_char_t_width_is(sequence) == 2) {
return F_true;
}
}
+ else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) {
+
+ // Number Forms: U+2160 to U+2188 (Roman Numerals).
+ if (sequence >= 0xe285a000 && sequence <= 0xe2868800) {
+ return F_true;
+ }
+ }
else if (macro_f_utf_char_t_to_char_1(sequence) == 0xea) {
// Vai: U+A620 to U+A629.
return F_true;
}
+ // Tangsa: U+16AC0 to U+16AC9.
+ if (sequence >= 0xf096ab80 && sequence <= 0xf096ab89) {
+ return F_true;
+ }
+
// Pahawh Hmong: U+16B50 to U+16B59.
if (sequence >= 0xf096ad90 && sequence <= 0xf096ad99) {
return F_true;
return F_false;
}
-#endif // !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_)
+#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_)
#ifdef __cplusplus
} // extern "C"
#if !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_)
f_status_t private_f_utf_character_is_word(const f_utf_char_t sequence, const bool strict) {
- if (private_f_utf_character_is_alphabetic_digit(sequence)) {
+ if (private_f_utf_character_is_alphabetic_decimal(sequence, 0)) {
return F_true;
}
#endif // _di_f_utf_substitute_
/**
- * Provide a basic UTF-8 character as a single 4-byte variable.
+ * Provide a basic UTF-8 byte sequence as a single 4-byte variable.
*
- * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte character.
+ * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte sequence.
*
- * This "character" type is stored as a big-endian 4-byte integer (32-bits).
- * A helper function, f_utf_is_big_endian(), is provided to detect system endianness so that character arrays (uint8_t []) can be correctly processed.
+ * This byte sequence type is stored as a big-endian 4-byte integer (32-bits).
+ * A helper function, f_utf_is_big_endian(), is provided to detect system endianness so that byte sequence arrays (uint8_t []) can be correctly processed.
*
* The byte structure is intended to be read left to right in memory regardless of system endianness.
- * This is done so that the first character (the left most) can be read naturally as a string, such as string[0] = first character.
+ * This is done so that the first byte (the left most) can be read naturally as a string, such as string[0] = first byte.
*
* On little-endian systems, the hex-string 0xff is represented as internally as 0x000000ff.
* This needs to be converted into the internal representation of 0xff000000 to be properly represented as a "f_utf_char_t".
*
- * The macro_f_utf_char_t_mask_byte_* are used to get the entire character set fo a given width.
+ * The macro_f_utf_char_t_mask_byte_* are used to get the entire byte sequence for a given width.
*
- * The macro_f_utf_char_t_mask_char_* are used to get a specific UTF-8 block as a single character range.
+ * The macro_f_utf_char_t_mask_char_* are used to get a specific UTF-8 block as a single byte sequence range.
*
* The macro_f_utf_char_t_to_char_* are used to convert a f_utf_char_t into a uint8_t, for a given 8-bit block.
*
* The macro_f_utf_char_t_from_char_* are used to convert a uint8_t into part of a f_utf_char_t, for a given 8-bit block.
*
- * The macro_f_utf_char_t_width is used to determine the width of the UTF-8 character based on macro_f_utf_byte_width.
- * The macro_f_utf_char_t_width_is is used to determine the width of the UTF-8 character based on macro_f_utf_byte_width_is.
+ * The macro_f_utf_char_t_width is used to determine the width of the UTF-8 byte sequence based on macro_f_utf_byte_width.
+ * The macro_f_utf_char_t_width_is is used to determine the width of the UTF-8 byte sequence based on macro_f_utf_byte_width_is.
*
- * The macro_f_utf_char_t_width macro determines a width of the UTF-8 character based on macro_f_utf_byte_width.
- * The macro_f_utf_char_t_width_is is identical to macro_f_utf_char_t_width, except it returns 0 when character is ASCII.
+ * The macro_f_utf_char_t_width macro determines a width of the UTF-8 byte sequence based on macro_f_utf_byte_width.
+ * The macro_f_utf_char_t_width_is is identical to macro_f_utf_char_t_width, except it returns 0 when byte sequence is ASCII.
*
* The macros that end in "_be" or "_le" represent "big endian" and "little endian".
* The default macros without the "_be" should be in "big endian" because the strings are always stored as if they were "big endian" without regard to the host byte order.
#define F_utf_char_mask_char_3_be_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000
#define F_utf_char_mask_char_4_be_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111
- #define macro_f_utf_char_t_to_char_1_be(character) (((character) & F_utf_char_mask_char_1_be_d) >> 24) // Grab first byte.
- #define macro_f_utf_char_t_to_char_2_be(character) (((character) & F_utf_char_mask_char_2_be_d) >> 16) // Grab second byte.
- #define macro_f_utf_char_t_to_char_3_be(character) (((character) & F_utf_char_mask_char_3_be_d) >> 8) // Grab third byte.
- #define macro_f_utf_char_t_to_char_4_be(character) ((character) & F_utf_char_mask_char_4_be_d) // Grab fourth byte.
+ #define macro_f_utf_char_t_to_char_1_be(sequence) (((sequence) & F_utf_char_mask_char_1_be_d) >> 24) // Grab first byte.
+ #define macro_f_utf_char_t_to_char_2_be(sequence) (((sequence) & F_utf_char_mask_char_2_be_d) >> 16) // Grab second byte.
+ #define macro_f_utf_char_t_to_char_3_be(sequence) (((sequence) & F_utf_char_mask_char_3_be_d) >> 8) // Grab third byte.
+ #define macro_f_utf_char_t_to_char_4_be(sequence) ((sequence) & F_utf_char_mask_char_4_be_d) // Grab fourth byte.
- #define macro_f_utf_char_t_from_char_1_be(character) (((character) << 24) & F_utf_char_mask_char_1_be_d) // Shift to first byte.
- #define macro_f_utf_char_t_from_char_2_be(character) (((character) << 16) & F_utf_char_mask_char_2_be_d) // Shift to second byte.
- #define macro_f_utf_char_t_from_char_3_be(character) (((character) << 8) & F_utf_char_mask_char_3_be_d) // Shift to third byte.
- #define macro_f_utf_char_t_from_char_4_be(character) ((character) & F_utf_char_mask_char_4_be_d) // Shift to fourth byte.
+ #define macro_f_utf_char_t_from_char_1_be(sequence) (((sequence) << 24) & F_utf_char_mask_char_1_be_d) // Shift to first byte.
+ #define macro_f_utf_char_t_from_char_2_be(sequence) (((sequence) << 16) & F_utf_char_mask_char_2_be_d) // Shift to second byte.
+ #define macro_f_utf_char_t_from_char_3_be(sequence) (((sequence) << 8) & F_utf_char_mask_char_3_be_d) // Shift to third byte.
+ #define macro_f_utf_char_t_from_char_4_be(sequence) ((sequence) & F_utf_char_mask_char_4_be_d) // Shift to fourth byte.
// Little Endian.
#define F_utf_char_mask_byte_1_le_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111
#define F_utf_char_mask_char_3_le_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000
#define F_utf_char_mask_char_4_le_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000
- #define macro_f_utf_char_t_to_char_1_le(character) ((character) & F_utf_char_mask_char_1_le_d) // Grab first byte.
- #define macro_f_utf_char_t_to_char_2_le(character) (((character) & F_utf_char_mask_char_2_le_d) >> 8) // Grab second byte.
- #define macro_f_utf_char_t_to_char_3_le(character) (((character) & F_utf_char_mask_char_3_le_d) >> 16) // Grab third byte.
- #define macro_f_utf_char_t_to_char_4_le(character) (((character) & F_utf_char_mask_char_4_le_d) >> 24) // Grab fourth byte.
+ #define macro_f_utf_char_t_to_char_1_le(sequence) ((sequence) & F_utf_char_mask_char_1_le_d) // Grab first byte.
+ #define macro_f_utf_char_t_to_char_2_le(sequence) (((sequence) & F_utf_char_mask_char_2_le_d) >> 8) // Grab second byte.
+ #define macro_f_utf_char_t_to_char_3_le(sequence) (((sequence) & F_utf_char_mask_char_3_le_d) >> 16) // Grab third byte.
+ #define macro_f_utf_char_t_to_char_4_le(sequence) (((sequence) & F_utf_char_mask_char_4_le_d) >> 24) // Grab fourth byte.
- #define macro_f_utf_char_t_from_char_1_le(character) ((character) & F_utf_char_mask_char_1_le_d) // Shift to first byte.
- #define macro_f_utf_char_t_from_char_2_le(character) (((character) << 8) & F_utf_char_mask_char_2_le_d) // Shift to second byte.
- #define macro_f_utf_char_t_from_char_3_le(character) (((character) << 16) & F_utf_char_mask_char_3_le_d) // Shift to third byte.
- #define macro_f_utf_char_t_from_char_4_le(character) (((character) << 24) & F_utf_char_mask_char_4_le_d) // Shift to fourth byte.
+ #define macro_f_utf_char_t_from_char_1_le(sequence) ((sequence) & F_utf_char_mask_char_1_le_d) // Shift to first byte.
+ #define macro_f_utf_char_t_from_char_2_le(sequence) (((sequence) << 8) & F_utf_char_mask_char_2_le_d) // Shift to second byte.
+ #define macro_f_utf_char_t_from_char_3_le(sequence) (((sequence) << 16) & F_utf_char_mask_char_3_le_d) // Shift to third byte.
+ #define macro_f_utf_char_t_from_char_4_le(sequence) (((sequence) << 24) & F_utf_char_mask_char_4_le_d) // Shift to fourth byte.
#define F_utf_char_mask_byte_1_d F_utf_char_mask_byte_1_be_d
#define F_utf_char_mask_byte_2_d F_utf_char_mask_byte_2_be_d
#define F_utf_char_mask_char_3_d F_utf_char_mask_char_3_be_d
#define F_utf_char_mask_char_4_d F_utf_char_mask_char_4_be_d
- #define macro_f_utf_char_t_to_char_1(character) macro_f_utf_char_t_to_char_1_be(character)
- #define macro_f_utf_char_t_to_char_2(character) macro_f_utf_char_t_to_char_2_be(character)
- #define macro_f_utf_char_t_to_char_3(character) macro_f_utf_char_t_to_char_3_be(character)
- #define macro_f_utf_char_t_to_char_4(character) macro_f_utf_char_t_to_char_4_be(character)
+ #define macro_f_utf_char_t_to_char_1(sequence) macro_f_utf_char_t_to_char_1_be(sequence)
+ #define macro_f_utf_char_t_to_char_2(sequence) macro_f_utf_char_t_to_char_2_be(sequence)
+ #define macro_f_utf_char_t_to_char_3(sequence) macro_f_utf_char_t_to_char_3_be(sequence)
+ #define macro_f_utf_char_t_to_char_4(sequence) macro_f_utf_char_t_to_char_4_be(sequence)
- #define macro_f_utf_char_t_from_char_1(character) macro_f_utf_char_t_from_char_1_be(character)
- #define macro_f_utf_char_t_from_char_2(character) macro_f_utf_char_t_from_char_2_be(character)
- #define macro_f_utf_char_t_from_char_3(character) macro_f_utf_char_t_from_char_3_be(character)
- #define macro_f_utf_char_t_from_char_4(character) macro_f_utf_char_t_from_char_4_be(character)
+ #define macro_f_utf_char_t_from_char_1(sequence) macro_f_utf_char_t_from_char_1_be(sequence)
+ #define macro_f_utf_char_t_from_char_2(sequence) macro_f_utf_char_t_from_char_2_be(sequence)
+ #define macro_f_utf_char_t_from_char_3(sequence) macro_f_utf_char_t_from_char_3_be(sequence)
+ #define macro_f_utf_char_t_from_char_4(sequence) macro_f_utf_char_t_from_char_4_be(sequence)
- #define macro_f_utf_char_t_width(character) (macro_f_utf_byte_width(macro_f_utf_char_t_to_char_1_be(character)))
- #define macro_f_utf_char_t_width_is(character) (macro_f_utf_byte_width_is(macro_f_utf_char_t_to_char_1_be(character)))
+ #define macro_f_utf_char_t_width(sequence) (macro_f_utf_byte_width(macro_f_utf_char_t_to_char_1_be(sequence)))
+ #define macro_f_utf_char_t_width_is(sequence) (macro_f_utf_byte_width_is(macro_f_utf_char_t_to_char_1_be(sequence)))
#endif // _di_f_utf_char_t_
/**
#endif // _di_f_utf_string_t_
/**
- * Define unicode special character widths.
+ * Define unicode special byte sequence widths.
*
* F_utf_width_*:
* - none: Designate this is not a width value or has no width (aka: NULL).
* - ambiguous: Characters appear in East Asian DBCS and in SBCS.
- * - full: Wide character that has a equivilent to a narrow character.
- * - half: Narrow character that has a equivilent to a wide character.
- * - narrow: Narrow character, without a wide equivalent.
+ * - full: Wide byte sequence that has a equivilent to a narrow byte sequence.
+ * - half: Narrow byte sequence that has a equivilent to a wide byte sequence.
+ * - narrow: Narrow byte sequence, without a wide equivalent.
* - nuetral: Characters that do not appear in East Asian DBCS codes.
- * - wide: Wide character, without a narrow equivalent.
+ * - wide: Wide byte sequence, without a narrow equivalent.
*/
#ifndef _di_f_utf_widths_t_
enum {
#include "../private-utf_alphabetic.h"
#include "../private-utf_combining.h"
#include "../private-utf_control.h"
+#include "../private-utf_decimal.h"
#include "../private-utf_digit.h"
#include "../private-utf_emoji.h"
#include "../private-utf_numeric.h"
#endif
#ifndef _di_f_utf_is_
- f_status_t f_utf_is(const f_string_t character) {
+ f_status_t f_utf_is(const f_string_t sequence) {
- return macro_f_utf_byte_width_is(*character);
+ return macro_f_utf_byte_width_is(*sequence);
}
#endif // _di_f_utf_is_
#ifndef _di_f_utf_is_alphabetic_
- f_status_t f_utf_is_alphabetic(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_alphabetic(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_alphabetic(character_utf);
+ return private_f_utf_character_is_alphabetic(utf);
}
- if (isalpha(*character)) {
- return F_true;
- }
+ if (isalpha(*sequence)) return F_true;
return F_false;
}
#endif // _di_f_utf_is_alphabetic_
-#ifndef _di_f_utf_is_alphabetic_digit_
- f_status_t f_utf_is_alphabetic_digit(const f_string_t character, const f_array_length_t width_max) {
+#ifndef _di_f_utf_is_alphabetic_decimal_
+ f_status_t f_utf_is_alphabetic_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_alphabetic_digit(character_utf);
+ return private_f_utf_character_is_alphabetic_decimal(utf, value);
}
- if (isalnum(*character)) {
+ if (isalpha(*sequence)) return F_true;
+
+ if (private_f_utf_character_is_decimal_for_ascii(*sequence, F_true, value) == F_true) {
return F_true;
}
return F_false;
}
-#endif // _di_f_utf_is_alphabetic_digit_
+#endif // _di_f_utf_is_alphabetic_decimal_
#ifndef _di_f_utf_is_alphabetic_numeric_
- f_status_t f_utf_is_alphabetic_numeric(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_alphabetic_numeric(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_alphabetic_numeric(character_utf);
+ return private_f_utf_character_is_alphabetic_numeric(utf);
}
- if (isalnum(*character)) {
- return F_true;
- }
+ if (isalnum(*sequence)) return F_true;
return F_false;
}
#endif // _di_f_utf_is_alphabetic_numeric_
#ifndef _di_f_utf_is_ascii_
- f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_ascii(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
#endif // _di_f_utf_is_ascii_
#ifndef _di_f_utf_is_combining_
- f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_combining(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_combining(character_utf);
+ return private_f_utf_character_is_combining(utf);
}
// There are no ASCII combining characters.
#endif // _di_f_utf_is_combining_
#ifndef _di_f_utf_is_control_
- f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_control(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_control(character_utf);
+ return private_f_utf_character_is_control(utf);
}
- if (iscntrl(*character)) {
- return F_true;
- }
+ if (iscntrl(*sequence)) return F_true;
return F_false;
}
#endif // _di_f_utf_is_control_
#ifndef _di_f_utf_is_control_code
- f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_control_code(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_control_code(character_utf);
+ return private_f_utf_character_is_control_code(utf);
}
- if (iscntrl(*character)) {
- return F_true;
- }
+ if (iscntrl(*sequence)) return F_true;
return F_false;
}
#endif // _di_f_utf_is_control_code_
#ifndef _di_f_utf_is_control_format_
- f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_control_format(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_control_format(character_utf);
+ return private_f_utf_character_is_control_format(utf);
}
// There are no ASCII control formats.
#endif // _di_f_utf_is_control_format_
#ifndef _di_f_utf_is_control_picture_
- f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_control_picture(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- if (macro_f_utf_byte_width_is(*character) != 3) {
+ if (macro_f_utf_byte_width_is(*sequence) != 3) {
return F_false;
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_control_picture(character_utf);
+ return private_f_utf_character_is_control_picture(utf);
}
// There are no ASCII control pictures.
}
#endif // _di_f_utf_is_control_picture_
-#ifndef _di_f_utf_is_digit_
- f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max) {
+#ifndef _di_f_utf_is_decimal_
+ f_status_t f_utf_is_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_digit(character_utf);
+ return private_f_utf_character_is_decimal(utf, F_true, value);
}
- if (isdigit(*character)) {
- return F_true;
+ return private_f_utf_character_is_decimal_for_ascii(*sequence, F_false, value);
+ }
+#endif // _di_f_utf_is_decimal_
+
+#ifndef _di_f_utf_is_digit_
+ f_status_t f_utf_is_digit(const f_string_t sequence, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
+ return F_status_set_error(F_complete_not_utf);
+ }
+
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_char_t utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_digit(utf);
}
+ if (isdigit(*sequence)) return F_true;
+
return F_false;
}
#endif // _di_f_utf_is_digit_
#ifndef _di_f_utf_is_emoji_
- f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_emoji(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_emoji(character_utf);
+ return private_f_utf_character_is_emoji(utf);
}
return F_false;
#endif // _di_f_utf_is_emoji_
#ifndef _di_f_utf_is_fragment_
- f_status_t f_utf_is_fragment(const f_string_t character) {
+ f_status_t f_utf_is_fragment(const f_string_t sequence) {
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_true;
}
#endif // _di_f_utf_is_fragment_
#ifndef _di_f_utf_is_graph_
- f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_graph(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- if (private_f_utf_character_is_control(character_utf)) {
+ if (private_f_utf_character_is_control(utf)) {
return F_false;
}
- if (private_f_utf_character_is_whitespace(character_utf)) {
+ if (private_f_utf_character_is_whitespace(utf)) {
return F_false;
}
// Zero-width characters are be treated as a non-graph.
- if (private_f_utf_character_is_zero_width(character_utf)) {
+ if (private_f_utf_character_is_zero_width(utf)) {
return F_false;
}
return F_true;
}
- if (isgraph(*character)) {
- return F_true;
- }
+ if (isgraph(*sequence)) return F_true;
return F_false;
}
#endif // _di_f_utf_is_graph_
#ifndef _di_f_utf_is_numeric_
- f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_numeric(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_numeric(character_utf);
+ return private_f_utf_character_is_numeric(utf);
}
- if (isdigit(*character)) {
- return F_true;
- }
+ if (isdigit(*sequence)) return F_true;
return F_false;
}
#endif // _di_f_utf_is_numeric_
#ifndef _di_f_utf_is_phonetic_
- f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_phonetic(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_phonetic(character_utf);
+ return private_f_utf_character_is_phonetic(utf);
}
// There are no ASCII phonetic characters.
#endif // _di_f_utf_is_phonetic_
#ifndef _di_f_utf_is_private_
- f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_private(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_private(character_utf);
+ return private_f_utf_character_is_private(utf);
}
// There are no ASCII private characters.
#endif // _di_f_utf_is_private_
#ifndef _di_f_utf_is_punctuation_
- f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_punctuation(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_punctuation(character_utf);
+ return private_f_utf_character_is_punctuation(utf);
}
// ASCII: U+0021 '!' to U+0023 '#'.
- if (character[0] > 0x20 && character[0] < 0x24) {
+ if (sequence[0] > 0x20 && sequence[0] < 0x24) {
return F_true;
}
// ASCII: U+0025 '%' to U+002A '*'.
- if (character[0] > 0x24 && character[0] < 0x2b) {
+ if (sequence[0] > 0x24 && sequence[0] < 0x2b) {
return F_true;
}
// ASCII: U+002C ',' to U+002F '/'.
- if (character[0] > 0x2b && character[0] < 0x30) {
+ if (sequence[0] > 0x2b && sequence[0] < 0x30) {
return F_true;
}
// ASCII: U+003A ':', U+003B ';', U+003F '?', or U+0040 '@'.
- if (character[0] == 0x3a || character[0] == 0x3b || character[0] == 0x3f || character[0] == 0x40) {
+ if (sequence[0] == 0x3a || sequence[0] == 0x3b || sequence[0] == 0x3f || sequence[0] == 0x40) {
return F_true;
}
// ASCII: U+005B '[' to U+005D ']'.
- if (character[0] > 0x5a && character[0] < 0x5e) {
+ if (sequence[0] > 0x5a && sequence[0] < 0x5e) {
return F_true;
}
// ASCII: U+005F '_', U+007B '{', or U+007D '}'.
- if (character[0] == 0x5f || character[0] == 0x7b || character[0] == 0x7d) {
+ if (sequence[0] == 0x5f || sequence[0] == 0x7b || sequence[0] == 0x7d) {
return F_true;
}
#endif // _di_f_utf_is_punctuation_
#ifndef _di_f_utf_is_subscript_
- f_status_t f_utf_is_subscript(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_subscript(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_subscript(character_utf);
+ return private_f_utf_character_is_subscript(utf);
}
return F_false;
#endif // _di_f_utf_is_subscript_
#ifndef _di_f_utf_is_superscript_
- f_status_t f_utf_is_superscript(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_superscript(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_superscript(character_utf);
+ return private_f_utf_character_is_superscript(utf);
}
return F_false;
#endif // _di_f_utf_is_superscript_
#ifndef _di_f_utf_is_symbol_
- f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_symbol(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_symbol(character_utf);
+ return private_f_utf_character_is_symbol(utf);
}
// ASCII: U+0024 ('$') or U+002B ('+').
- if (character[0] == 0x24 || character[0] == 0x2b) {
+ if (sequence[0] == 0x24 || sequence[0] == 0x2b) {
return F_true;
}
// ASCII: U+003C ('<') to U+003E ('>').
- if (character[0] >= 0x3c && character[0] <= 0x3e) {
+ if (sequence[0] >= 0x3c && sequence[0] <= 0x3e) {
return F_true;
}
// ASCII: U+005E ('^'), U+0060 ('`'), U+007C ('|'), or U+007E ('~').
- if (character[0] == 0x5e || character[0] == 0x60 || character[0] == 0x7c || character[0] == 0x7e) {
+ if (sequence[0] == 0x5e || sequence[0] == 0x60 || sequence[0] == 0x7c || sequence[0] == 0x7e) {
return F_true;
}
#endif // _di_f_utf_is_symbol_
#ifndef _di_f_utf_is_unassigned_
- f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_unassigned(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_unassigned(character_utf);
+ return private_f_utf_character_is_unassigned(utf);
}
// ASCII are never unassigned.
#endif // _di_f_utf_is_unassigned_
#ifndef _di_f_utf_is_valid_
- f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_valid(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_valid(character_utf);
+ return private_f_utf_character_is_valid(utf);
}
// ASCII are valid.
#endif // _di_f_utf_is_valid_
#ifndef _di_f_utf_is_whitespace_
- f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_whitespace(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_whitespace(character_utf);
+ return private_f_utf_character_is_whitespace(utf);
}
- if (isspace(*character)) {
- return F_true;
- }
+ if (isspace(*sequence)) return F_true;
return F_false;
}
#endif // _di_f_utf_is_whitespace_
#ifndef _di_f_utf_is_whitespace_modifier_
- f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_whitespace_modifier(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_whitespace_modifier(character_utf);
+ return private_f_utf_character_is_whitespace_modifier(utf);
}
// There are no ASCII whitespace modifiers.
#endif // _di_f_utf_is_whitespace_modifier_
#ifndef _di_f_utf_is_whitespace_other_
- f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_whitespace_other(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_whitespace_other(character_utf);
+ return private_f_utf_character_is_whitespace_other(utf);
}
// There are no ASCII whitespace other.
#endif // _di_f_utf_is_whitespace_other_
#ifndef _di_f_utf_is_wide_
- f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_wide(const f_string_t sequence, const f_array_length_t width_max) {
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_wide(character_utf);
+ return private_f_utf_character_is_wide(utf);
}
// There are no wide ASCII characters.
#endif // _di_f_utf_is_wide_
#ifndef _di_f_utf_is_word_
- f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+ f_status_t f_utf_is_word(const f_string_t sequence, const f_array_length_t width_max, const bool strict) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_word(character_utf, strict);
+ return private_f_utf_character_is_word(utf, strict);
}
- if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0]) {
+ if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0]) {
return F_true;
}
#endif // _di_f_utf_is_word_
#ifndef _di_f_utf_is_word_dash_
- f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+ f_status_t f_utf_is_word_dash(const f_string_t sequence, const f_array_length_t width_max, const bool strict) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_word_dash(character_utf, strict);
+ return private_f_utf_character_is_word_dash(utf, strict);
}
- if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0]) {
+ if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0] || *sequence == f_string_ascii_minus_s.string[0]) {
return F_true;
}
#endif // _di_f_utf_is_word_dash_
#ifndef _di_f_utf_is_word_dash_plus_
- f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+ f_status_t f_utf_is_word_dash_plus(const f_string_t sequence, const f_array_length_t width_max, const bool strict) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_word_dash_plus(character_utf, strict);
+ return private_f_utf_character_is_word_dash_plus(utf, strict);
}
- if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0] || *character == f_string_ascii_plus_s.string[0]) {
+ if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0] || *sequence == f_string_ascii_minus_s.string[0] || *sequence == f_string_ascii_plus_s.string[0]) {
return F_true;
}
#endif // _di_f_utf_is_word_dash_plus_
#ifndef _di_f_utf_is_zero_width_
- f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max) {
+ f_status_t f_utf_is_zero_width(const f_string_t sequence, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
if (width_max < 1) return F_status_set_error(F_parameter);
#endif // _di_level_0_parameter_checking_
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
+ if (macro_f_utf_byte_width_is(*sequence)) {
+ if (macro_f_utf_byte_width_is(*sequence) > width_max) {
return F_status_set_error(F_complete_not_utf);
}
- if (macro_f_utf_byte_width_is(*character) == 1) {
+ if (macro_f_utf_byte_width_is(*sequence) == 1) {
return F_status_set_error(F_utf_fragment);
}
- f_utf_char_t character_utf = 0;
+ f_utf_char_t utf = 0;
{
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
- return private_f_utf_character_is_zero_width(character_utf);
+ return private_f_utf_character_is_zero_width(utf);
}
// These control characters are considered zero-width spaces.
- if (*character >= 0x00 && *character <= 0x08) {
+ if (*sequence >= 0x00 && *sequence <= 0x08) {
return F_true;
}
- else if (*character >= 0x0c && *character <= 0x1f) {
+ else if (*sequence >= 0x0c && *sequence <= 0x1f) {
return F_true;
}
- else if (*character == 0x7f) {
+ else if (*sequence == 0x7f) {
return F_true;
}
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or digit character.
*
- * Digit characters are decimal digits and letter numbers.
+ * Decimal characters are decimal digits.
*
* This does not include number-like, such as 1/2 (½) or superscript 2 (²).
*
+ * Decimal refers to a unit of base-10.
+ * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16.
+ * If base-10 is desired, then simply ignore values greater than 9.
+ * For example, a base-16 character 'a' would result in the integer 10.
+ * Just ignore the value.
+ * This also processes large values such as roman numerals.
+ * Roman Numerals, however conflict with the natural hexidecimal numbers.
+ * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals.
+ * For example, the Roman Numeral 'â…¯' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000.
+ *
+ * This function always returns F_true for valid decimal digits to avoid confusion between alphabetic and digits in regards to the base unit.
+ * The 'F' is a character and a base-16 digit.
+ * If this were to return F_false because it is greater than the requested base-12 then there would be confusion on whether or not 'F' is alphabetic.
+ * If the determined digit is greater than the requested base, the 0xffff is assigned to value.
+ *
* @param sequence
* The byte sequence to validate as a character.
* There must be enough space allocated to compare against, as limited by width_max.
* @param width_max
* The maximum width available for checking.
* Can be anything greater than 0.
+ * @param value
+ * (optional) The integer representation of the character if the character is a decimal.
+ * If specified, value is set to 0xffff to represent no known representation.
+ * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ * If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ * If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ * Set to NULL to not use.
*
* @return
* F_true if a UTF-8 alphabet character.
*
* @see isalnum()
*/
+#ifndef _di_f_utf_is_alphabetic_decimal_
+ extern f_status_t f_utf_is_alphabetic_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value);
+#endif // _di_f_utf_is_alphabetic_decimal_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
+ *
+ * Digit characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param sequence
+ * The byte sequence to validate as a character.
+ *
+ * @return
+ * F_true if a UTF-8 alphabetic-digit character.
+ * F_false if not a UTF-8 alphabetic-digit character.
+ *
+ * F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ * F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see isalnum()
+ */
#ifndef _di_f_utf_is_alphabetic_digit_
extern f_status_t f_utf_is_alphabetic_digit(const f_string_t sequence, const f_array_length_t width_max);
#endif // _di_f_utf_is_alphabetic_digit_
/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character.
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character.
*
* Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
*
- * @see isalnum()
+ * @see isalpha()
+ * @see isdigit()
*/
#ifndef _di_f_utf_is_alphabetic_numeric_
extern f_status_t f_utf_is_alphabetic_numeric(const f_string_t sequence, const f_array_length_t width_max);
#endif // _di_f_utf_is_control_picture_
/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 decimal character.
+ *
+ * Decimal characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * Decimal refers to a unit of base-10.
+ * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16.
+ * If base-10 is desired, then simply ignore values greater than 9.
+ * For example, a base-16 character 'a' would result in the integer 10.
+ * Just ignore the value.
+ * This also processes large values such as roman numerals.
+ * Roman Numerals, however conflict with the natural hexidecimal numbers.
+ * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals.
+ * For example, the Roman Numeral 'â…¯' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000.
+ *
+ * This function only returns F_true for valid decimal digits within the requested base.
+ *
+ * @param sequence
+ * The byte sequence to validate as a character.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ * @param base
+ * (optional) The base digit to specify (up to base 16).
+ * Set to 0 to not use.
+ * This is ignored when value is NULL.
+ * @param value
+ * (optional) The integer representation of the character if the character is a decimal.
+ * If specified, value is set to 0xffff to represent no known representation.
+ * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ * If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ * If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ * Set to NULL to not use.
+ *
+ * @return
+ * F_true if a UTF-8 decimal character.
+ * F_false if not a UTF-8 decimal character.
+ *
+ * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ * F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ * F_utf_not (with error bit) if Unicode is an invalid Unicode character.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_is_decimal_
+ extern f_status_t f_utf_is_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value);
+#endif // _di_f_utf_is_decimal_
+
+/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
*
+ * Digit characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
* @param sequence
* The byte sequence to validate as a character.
* There must be enough space allocated to compare against, as limited by width_max.
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
*
- * A word character is alpha-digit or an underscore '_'.
+ * A word character is alphabetic-decimal or an underscore '_'.
*
* @param sequence
* The byte sequence to validate as a character.
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
*
- * A word dash character is alpha-digit, an underscore '_' or a dash '-'.
+ * A word dash character is alphabetic-decimal, an underscore '_' or a dash '-'.
*
* Unicode appears to refer to dashes that connect words as a hyphen.
* Therefore, only these hyphens are considered dashes for the purposes of this function.
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
*
- * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
+ * A word dash plus character is alphabetic-decimal, an underscore '_', a dash '-', or a plus '+'.
*
* Unicode appears to refer to dashes that connect words as a hyphen.
* Therefore, only these hyphens are considered dashes for the purposes of this function.
#include "../private-utf_alphabetic.h"
#include "../private-utf_combining.h"
#include "../private-utf_control.h"
+#include "../private-utf_decimal.h"
#include "../private-utf_digit.h"
#include "../private-utf_emoji.h"
#include "../private-utf_numeric.h"
}
#endif // _di_f_utf_character_is_alphabetic_
+#ifndef _di_f_utf_character_is_alphabetic_decimal_
+ f_status_t f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) {
+
+ if (macro_f_utf_char_t_width_is(sequence)) {
+ if (macro_f_utf_char_t_width_is(sequence) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_alphabetic_decimal(sequence, value);
+ }
+
+ if (isalpha(macro_f_utf_char_t_to_char_1(sequence))) return F_true;
+
+ if (private_f_utf_character_is_decimal_for_ascii(macro_f_utf_char_t_to_char_1(sequence), F_true, value) == F_true) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_alphabetic_decimal_
+
#ifndef _di_f_utf_character_is_alphabetic_digit_
- f_status_t f_utf_character_is_alpha_digit(const f_utf_char_t sequence) {
+ f_status_t f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence) {
if (macro_f_utf_char_t_width_is(sequence)) {
if (macro_f_utf_char_t_width_is(sequence) == 1) {
}
#endif // _di_f_utf_character_is_control_picture_
+#ifndef _di_f_utf_character_is_decimal_
+ f_status_t f_utf_character_is_decimal(const f_utf_char_t sequence, uint32_t * const value) {
+
+ if (macro_f_utf_char_t_width_is(sequence)) {
+ if (macro_f_utf_char_t_width_is(sequence) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_decimal(sequence, F_false, value);
+ }
+
+ return private_f_utf_character_is_decimal_for_ascii(macro_f_utf_char_t_to_char_1(sequence), F_false, value);
+ }
+#endif // _di_f_utf_character_is_decimal_
+
#ifndef _di_f_utf_character_is_digit_
f_status_t f_utf_character_is_digit(const f_utf_char_t sequence) {
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
*
- * Digit characters are decimal digits and letter numbers.
+ * Decimal characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * Decimal refers to a unit of base-10.
+ * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16.
+ * If base-10 is desired, then simply ignore values greater than 9.
+ * For example, a base-16 character 'a' would result in the integer 10.
+ * Just ignore the value.
+ * This also processes large values such as roman numerals.
+ * Roman Numerals, however conflict with the natural hexidecimal numbers.
+ * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals.
+ * For example, the Roman Numeral 'â…¯' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000.
+ *
+ * This function always returns F_true for valid decimal digits to avoid confusion between alphabetic and digits in regards to the base unit.
+ * The 'F' is a character and a base-16 digit.
+ * If this were to return F_false because it is greater than the requested base-12 then there would be confusion on whether or not 'F' is alphabetic.
+ * If the determined digit is greater than the requested base, the 0xffff is assigned to value.
+ *
+ * @param sequence
+ * The byte sequence to validate as a character.
+ * @param value
+ * (optional) The integer representation of the character if the character is a decimal.
+ * If specified, value is set to 0xffff to represent no known representation.
+ * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ * If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ * If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ * Set to NULL to not use.
+ *
+ * @return
+ * F_true if a UTF-8 alphabetic-decimal character.
+ * F_false if not a UTF-8 alphabetic-decimal character.
+ *
+ * F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ * F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see isalpha()
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_character_is_alphabetic_decimal_
+ extern f_status_t f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value);
+#endif // _di_f_utf_character_is_alphabetic_decimal_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
+ *
+ * Digit characters are decimal digits.
*
* This does not include number-like, such as 1/2 (½) or superscript 2 (²).
*
* The byte sequence to validate as a character.
*
* @return
- * F_true if a UTF-8 alpha-digit character.
- * F_false if not a UTF-8 alpha-digit character.
+ * F_true if a UTF-8 alphabetic-digit character.
+ * F_false if not a UTF-8 alphabetic-digit character.
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
* @see isalnum()
*/
#ifndef _di_f_utf_character_is_alphabetic_digit_
- extern f_status_t f_utf_character_is_alpha_digit(const f_utf_char_t sequence);
+ extern f_status_t f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence);
#endif // _di_f_utf_character_is_alphabetic_digit_
/**
#endif // _di_f_utf_character_is_control_picture_
/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 decimal character.
+ *
+ * Decimal characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * Decimal refers to a unit of base-10.
+ * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16.
+ * If base-10 is desired, then simply ignore values greater than 9.
+ * For example, a base-16 character 'a' would result in the integer 10.
+ * Just ignore the value.
+ * This also processes large values such as roman numerals.
+ * Roman Numerals, however conflict with the natural hexidecimal numbers.
+ * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals.
+ * For example, the Roman Numeral 'â…¯' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000.
+ *
+ * This function only returns F_true for valid decimal digits within the requested base.
+ *
+ * @param sequence
+ * The byte sequence to validate as a character.
+ * @param value
+ * (optional) The integer representation of the character if the character is a decimal.
+ * If specified, value is set to 0xffff to represent no known representation.
+ * If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ * If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ * For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ * If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ * Set to NULL to not use.
+ *
+ * @return
+ * F_true if a UTF-8 decimal character.
+ * F_false if not a UTF-8 decimal character.
+ *
+ * F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ * F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see isdecimal()
+ */
+#ifndef _di_f_utf_character_is_decimal_
+ extern f_status_t f_utf_character_is_decimal(const f_utf_char_t sequence, uint32_t * const value);
+#endif // _di_f_utf_character_is_decimal_
+
+/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
*
- * Digit characters are decimal digits and letter numbers.
+ * Digit characters are decimal digits.
*
* This does not include number-like, such as 1/2 (½) or superscript 2 (²).
*
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
*
- * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
+ * A word dash plus character is alphabetic-decimal, an underscore '_', a dash '-', or a plus '+'.
*
* Unicode appears to refer to dashes that connect words as a hyphen.
* Therefore, only these hyphens are considered dashes for the purposes of this function.
build_libraries -lc
build_libraries-individual -lf_memory -lf_string
-build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c
+build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_decimal.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c
build_sources_library utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/map_multi.c utf/static.c utf/string.c utf/triple.c
build_sources_library utf/private-is_unassigned.c utf/private-dynamic.c utf/private-map.c utf/private-map_multi.c utf/private-string.c utf/private-triple.c
build_sources_program test-utf-character_is_alphabetic.c test-utf-is_alphabetic.c
build_sources_program test-utf-character_is_combining.c test-utf-is_combining.c
build_sources_program test-utf-character_is_control.c test-utf-is_control.c
+build_sources_program test-utf-character_is_decimal.c test-utf-is_decimal.c
build_sources_program test-utf-character_is_digit.c test-utf-is_digit.c
build_sources_program test-utf-character_is_emoji.c test-utf-is_emoji.c
build_sources_program test-utf-character_is_numeric.c test-utf-is_numeric.c
--- /dev/null
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+55712
+55713
+55714
+55715
+55716
+55717
+55718
+55719
+55720
+55721
+56240
+56241
+56242
+56243
+56244
+56245
+56246
+56247
+56248
+56249
+57216
+57217
+57218
+57219
+57220
+57221
+57222
+57223
+57224
+57225
+14722470
+14722471
+14722472
+14722473
+14722474
+14722475
+14722476
+14722477
+14722478
+14722479
+14722982
+14722983
+14722984
+14722985
+14722986
+14722987
+14722988
+14722989
+14722990
+14722991
+14723494
+14723495
+14723496
+14723497
+14723498
+14723499
+14723500
+14723501
+14723502
+14723503
+14724006
+14724007
+14724008
+14724009
+14724010
+14724011
+14724012
+14724013
+14724014
+14724015
+14724518
+14724519
+14724520
+14724521
+14724522
+14724523
+14724524
+14724525
+14724526
+14724527
+14725030
+14725031
+14725032
+14725033
+14725034
+14725035
+14725036
+14725037
+14725038
+14725039
+14725542
+14725543
+14725544
+14725545
+14725546
+14725547
+14725548
+14725549
+14725550
+14725551
+14726054
+14726055
+14726056
+14726057
+14726058
+14726059
+14726060
+14726061
+14726062
+14726063
+14726566
+14726567
+14726568
+14726569
+14726570
+14726571
+14726572
+14726573
+14726574
+14726575
+14727078
+14727079
+14727080
+14727081
+14727082
+14727083
+14727084
+14727085
+14727086
+14727087
+14727568
+14727569
+14727570
+14727571
+14727572
+14727573
+14727574
+14727575
+14727576
+14727577
+14728080
+14728081
+14728082
+14728083
+14728084
+14728085
+14728086
+14728087
+14728088
+14728089
+14728352
+14728353
+14728354
+14728355
+14728356
+14728357
+14728358
+14728359
+14728360
+14728361
+14778752
+14778753
+14778754
+14778755
+14778756
+14778757
+14778758
+14778759
+14778760
+14778761
+14779024
+14779025
+14779026
+14779027
+14779028
+14779029
+14779030
+14779031
+14779032
+14779033
+14786464
+14786465
+14786466
+14786467
+14786468
+14786469
+14786470
+14786471
+14786472
+14786473
+14786704
+14786705
+14786706
+14786707
+14786708
+14786709
+14786710
+14786711
+14786712
+14786713
+14787974
+14787975
+14787976
+14787977
+14787978
+14787979
+14787980
+14787981
+14787982
+14787983
+14788496
+14788497
+14788498
+14788499
+14788500
+14788501
+14788502
+14788503
+14788504
+14788505
+14789248
+14789249
+14789250
+14789251
+14789252
+14789253
+14789254
+14789255
+14789256
+14789257
+14789264
+14789265
+14789266
+14789267
+14789268
+14789269
+14789270
+14789271
+14789272
+14789273
+14790032
+14790033
+14790034
+14790035
+14790036
+14790037
+14790038
+14790039
+14790040
+14790041
+14790320
+14790321
+14790322
+14790323
+14790324
+14790325
+14790326
+14790327
+14790328
+14790329
+14791040
+14791041
+14791042
+14791043
+14791044
+14791045
+14791046
+14791047
+14791048
+14791049
+14791056
+14791057
+14791058
+14791059
+14791060
+14791061
+14791062
+14791063
+14791064
+14791065
+14845344
+14845345
+14845346
+14845347
+14845348
+14845349
+14845350
+14845351
+14845352
+14845353
+14845354
+14845355
+14845356
+14845357
+14845358
+14845359
+14845360
+14845361
+14845362
+14845363
+14845364
+14845365
+14845366
+14845367
+14845368
+14845369
+14845370
+14845371
+14845372
+14845373
+14845374
+14845375
+14845568
+14845569
+14845570
+14845571
+14845572
+14845573
+14845574
+14845575
+14845576
+15374496
+15374497
+15374498
+15374499
+15374500
+15374501
+15374502
+15374503
+15374504
+15374505
+15377296
+15377297
+15377298
+15377299
+15377300
+15377301
+15377302
+15377303
+15377304
+15377305
+15377536
+15377537
+15377538
+15377539
+15377540
+15377541
+15377542
+15377543
+15377544
+15377545
+15378320
+15378321
+15378322
+15378323
+15378324
+15378325
+15378326
+15378327
+15378328
+15378329
+15378352
+15378353
+15378354
+15378355
+15378356
+15378357
+15378358
+15378359
+15378360
+15378361
+15378832
+15378833
+15378834
+15378835
+15378836
+15378837
+15378838
+15378839
+15378840
+15378841
+15380400
+15380401
+15380402
+15380403
+15380404
+15380405
+15380406
+15380407
+15380408
+15380409
+15711376
+15711377
+15711378
+15711379
+15711380
+15711381
+15711382
+15711383
+15711384
+15711385
+4036006560
+4036006561
+4036006562
+4036006563
+4036006564
+4036006565
+4036006566
+4036006567
+4036006568
+4036006569
+4036015280
+4036015281
+4036015282
+4036015283
+4036015284
+4036015285
+4036015286
+4036015287
+4036015288
+4036015289
+4036067750
+4036067751
+4036067752
+4036067753
+4036067754
+4036067755
+4036067756
+4036067757
+4036067758
+4036067759
+4036068272
+4036068273
+4036068274
+4036068275
+4036068276
+4036068277
+4036068278
+4036068279
+4036068280
+4036068281
+4036068534
+4036068535
+4036068536
+4036068537
+4036068538
+4036068539
+4036068540
+4036068541
+4036068542
+4036068543
+4036069264
+4036069265
+4036069266
+4036069267
+4036069268
+4036069269
+4036069270
+4036069271
+4036069272
+4036069273
+4036070320
+4036070321
+4036070322
+4036070323
+4036070324
+4036070325
+4036070326
+4036070327
+4036070328
+4036070329
+4036071824
+4036071825
+4036071826
+4036071827
+4036071828
+4036071829
+4036071830
+4036071831
+4036071832
+4036071833
+4036072336
+4036072337
+4036072338
+4036072339
+4036072340
+4036072341
+4036072342
+4036072343
+4036072344
+4036072345
+4036073872
+4036073873
+4036073874
+4036073875
+4036073876
+4036073877
+4036073878
+4036073879
+4036073880
+4036073881
+4036074368
+4036074369
+4036074370
+4036074371
+4036074372
+4036074373
+4036074374
+4036074375
+4036074376
+4036074377
+4036074672
+4036074673
+4036074674
+4036074675
+4036074676
+4036074677
+4036074678
+4036074679
+4036074680
+4036074681
+4036076448
+4036076449
+4036076450
+4036076451
+4036076452
+4036076453
+4036076454
+4036076455
+4036076456
+4036076457
+4036076944
+4036076945
+4036076946
+4036076947
+4036076948
+4036076949
+4036076950
+4036076951
+4036076952
+4036076953
+4036080016
+4036080017
+4036080018
+4036080019
+4036080020
+4036080021
+4036080022
+4036080023
+4036080024
+4036080025
+4036081040
+4036081041
+4036081042
+4036081043
+4036081044
+4036081045
+4036081046
+4036081047
+4036081048
+4036081049
+4036081312
+4036081313
+4036081314
+4036081315
+4036081316
+4036081317
+4036081318
+4036081319
+4036081320
+4036081321
+4036405664
+4036405665
+4036405666
+4036405667
+4036405668
+4036405669
+4036405670
+4036405671
+4036405672
+4036405673
+4036406144
+4036406145
+4036406146
+4036406147
+4036406148
+4036406149
+4036406150
+4036406151
+4036406152
+4036406153
+4036406672
+4036406673
+4036406674
+4036406675
+4036406676
+4036406677
+4036406678
+4036406679
+4036406680
+4036406681
+4036861838
+4036861839
+4036861840
+4036861841
+4036861842
+4036861843
+4036861844
+4036861845
+4036861846
+4036861847
+4036861848
+4036861849
+4036861850
+4036861851
+4036861852
+4036861853
+4036861854
+4036861855
+4036861856
+4036861857
+4036861858
+4036861859
+4036861860
+4036861861
+4036861862
+4036861863
+4036861864
+4036861865
+4036861866
+4036861867
+4036861868
+4036861869
+4036861870
+4036861871
+4036861872
+4036861873
+4036861874
+4036861875
+4036861876
+4036861877
+4036861878
+4036861879
+4036861880
+4036861881
+4036861882
+4036861883
+4036861884
+4036861885
+4036861886
+4036861887
+4036920704
+4036920705
+4036920706
+4036920707
+4036920708
+4036920709
+4036920710
+4036920711
+4036920712
+4036920713
+4036922288
+4036922289
+4036922290
+4036922291
+4036922292
+4036922293
+4036922294
+4036922295
+4036922296
+4036922297
+4036928912
+4036928913
+4036928914
+4036928915
+4036928916
+4036928917
+4036928918
+4036928919
+4036928920
+4036928921
+4036997040
+4036997041
+4036997042
+4036997043
+4036997044
+4036997045
+4036997046
+4036997047
+4036997048
+4036997049
14778759
14778760
14778761
-4036006560
-4036006561
-4036006562
-4036006563
-4036006564
-4036006565
-4036006566
-4036006567
-4036006568
-4036006569
14779024
14779025
14779026
14779031
14779032
14779033
+14786464
+14786465
+14786466
+14786467
+14786468
+14786469
+14786470
+14786471
+14786472
+14786473
+14786704
+14786705
+14786706
+14786707
+14786708
+14786709
+14786710
+14786711
+14786712
+14786713
+14787974
+14787975
+14787976
+14787977
+14787978
+14787979
+14787980
+14787981
+14787982
+14787983
+14788496
+14788497
+14788498
+14788499
+14788500
+14788501
+14788502
+14788503
+14788504
+14788505
+14789248
+14789249
+14789250
+14789251
+14789252
+14789253
+14789254
+14789255
+14789256
+14789257
+14789264
+14789265
+14789266
+14789267
+14789268
+14789269
+14789270
+14789271
+14789272
+14789273
+14790032
+14790033
+14790034
+14790035
+14790036
+14790037
+14790038
+14790039
+14790040
+14790041
+14790320
+14790321
+14790322
+14790323
+14790324
+14790325
+14790326
+14790327
+14790328
+14790329
+14791040
+14791041
+14791042
+14791043
+14791044
+14791045
+14791046
+14791047
+14791048
+14791049
+14791056
+14791057
+14791058
+14791059
+14791060
+14791061
+14791062
+14791063
+14791064
+14791065
+14845344
+14845345
+14845346
+14845347
+14845348
+14845349
+14845350
+14845351
+14845352
+14845353
+14845354
+14845355
+14845356
+14845357
+14845358
+14845359
+14845360
+14845361
+14845362
+14845363
+14845364
+14845365
+14845366
+14845367
+14845368
+14845369
+14845370
+14845371
+14845372
+14845373
+14845374
+14845375
+14845568
+14845569
+14845570
+14845571
+14845572
+14845573
+14845574
+14845575
+14845576
+15374496
+15374497
+15374498
+15374499
+15374500
+15374501
+15374502
+15374503
+15374504
+15374505
+15377296
+15377297
+15377298
+15377299
+15377300
+15377301
+15377302
+15377303
+15377304
+15377305
+15377536
+15377537
+15377538
+15377539
+15377540
+15377541
+15377542
+15377543
+15377544
+15377545
+15378320
+15378321
+15378322
+15378323
+15378324
+15378325
+15378326
+15378327
+15378328
+15378329
+15378352
+15378353
+15378354
+15378355
+15378356
+15378357
+15378358
+15378359
+15378360
+15378361
+15378832
+15378833
+15378834
+15378835
+15378836
+15378837
+15378838
+15378839
+15378840
+15378841
+15380400
+15380401
+15380402
+15380403
+15380404
+15380405
+15380406
+15380407
+15380408
+15380409
+15711376
+15711377
+15711378
+15711379
+15711380
+15711381
+15711382
+15711383
+15711384
+15711385
+4036006560
+4036006561
+4036006562
+4036006563
+4036006564
+4036006565
+4036006566
+4036006567
+4036006568
+4036006569
4036015280
4036015281
4036015282
4036405671
4036405672
4036405673
+4036406144
+4036406145
+4036406146
+4036406147
+4036406148
+4036406149
+4036406150
+4036406151
+4036406152
+4036406153
4036406672
4036406673
4036406674
4036406679
4036406680
4036406681
-14786464
-14786465
-14786466
-14786467
-14786468
-14786469
-14786470
-14786471
-14786472
-14786473
-14786704
-14786705
-14786706
-14786707
-14786708
-14786709
-14786710
-14786711
-14786712
-14786713
-14787974
-14787975
-14787976
-14787977
-14787978
-14787979
-14787980
-14787981
-14787982
-14787983
-14788496
-14788497
-14788498
-14788499
-14788500
-14788501
-14788502
-14788503
-14788504
-14788505
-14789248
-14789249
-14789250
-14789251
-14789252
-14789253
-14789254
-14789255
-14789256
-14789257
-14789264
-14789265
-14789266
-14789267
-14789268
-14789269
-14789270
-14789271
-14789272
-14789273
-14790032
-14790033
-14790034
-14790035
-14790036
-14790037
-14790038
-14790039
-14790040
-14790041
-14790320
-14790321
-14790322
-14790323
-14790324
-14790325
-14790326
-14790327
-14790328
-14790329
-14791040
-14791041
-14791042
-14791043
-14791044
-14791045
-14791046
-14791047
-14791048
-14791049
-14791056
-14791057
-14791058
-14791059
-14791060
-14791061
-14791062
-14791063
-14791064
-14791065
4036861838
4036861839
4036861840
4036997047
4036997048
4036997049
-15374496
-15374497
-15374498
-15374499
-15374500
-15374501
-15374502
-15374503
-15374504
-15374505
-15377296
-15377297
-15377298
-15377299
-15377300
-15377301
-15377302
-15377303
-15377304
-15377305
-15377536
-15377537
-15377538
-15377539
-15377540
-15377541
-15377542
-15377543
-15377544
-15377545
-15378320
-15378321
-15378322
-15378323
-15378324
-15378325
-15378326
-15378327
-15378328
-15378329
-15378352
-15378353
-15378354
-15378355
-15378356
-15378357
-15378358
-15378359
-15378360
-15378361
-15378832
-15378833
-15378834
-15378835
-15378836
-15378837
-15378838
-15378839
-15378840
-15378841
-15380400
-15380401
-15380402
-15380403
-15380404
-15380405
-15380406
-15380407
-15380408
-15380409
-15711376
-15711377
-15711378
-15711379
-15711380
-15711381
-15711382
-15711383
-15711384
-15711385
--- /dev/null
+U+0030
+U+0031
+U+0032
+U+0033
+U+0034
+U+0035
+U+0036
+U+0037
+U+0038
+U+0039
+U+0660
+U+0661
+U+0662
+U+0663
+U+0664
+U+0665
+U+0666
+U+0667
+U+0668
+U+0669
+U+06F0
+U+06F1
+U+06F2
+U+06F3
+U+06F4
+U+06F5
+U+06F6
+U+06F7
+U+06F8
+U+06F9
+U+07C0
+U+07C1
+U+07C2
+U+07C3
+U+07C4
+U+07C5
+U+07C6
+U+07C7
+U+07C8
+U+07C9
+U+0966
+U+0967
+U+0968
+U+0969
+U+096A
+U+096B
+U+096C
+U+096D
+U+096E
+U+096F
+U+09E6
+U+09E7
+U+09E8
+U+09E9
+U+09EA
+U+09EB
+U+09EC
+U+09ED
+U+09EE
+U+09EF
+U+0A66
+U+0A67
+U+0A68
+U+0A69
+U+0A6A
+U+0A6B
+U+0A6C
+U+0A6D
+U+0A6E
+U+0A6F
+U+0AE6
+U+0AE7
+U+0AE8
+U+0AE9
+U+0AEA
+U+0AEB
+U+0AEC
+U+0AED
+U+0AEE
+U+0AEF
+U+0B66
+U+0B67
+U+0B68
+U+0B69
+U+0B6A
+U+0B6B
+U+0B6C
+U+0B6D
+U+0B6E
+U+0B6F
+U+0BE6
+U+0BE7
+U+0BE8
+U+0BE9
+U+0BEA
+U+0BEB
+U+0BEC
+U+0BED
+U+0BEE
+U+0BEF
+U+0C66
+U+0C67
+U+0C68
+U+0C69
+U+0C6A
+U+0C6B
+U+0C6C
+U+0C6D
+U+0C6E
+U+0C6F
+U+0CE6
+U+0CE7
+U+0CE8
+U+0CE9
+U+0CEA
+U+0CEB
+U+0CEC
+U+0CED
+U+0CEE
+U+0CEF
+U+0D66
+U+0D67
+U+0D68
+U+0D69
+U+0D6A
+U+0D6B
+U+0D6C
+U+0D6D
+U+0D6E
+U+0D6F
+U+0DE6
+U+0DE7
+U+0DE8
+U+0DE9
+U+0DEA
+U+0DEB
+U+0DEC
+U+0DED
+U+0DEE
+U+0DEF
+U+0E50
+U+0E51
+U+0E52
+U+0E53
+U+0E54
+U+0E55
+U+0E56
+U+0E57
+U+0E58
+U+0E59
+U+0ED0
+U+0ED1
+U+0ED2
+U+0ED3
+U+0ED4
+U+0ED5
+U+0ED6
+U+0ED7
+U+0ED8
+U+0ED9
+U+0F20
+U+0F21
+U+0F22
+U+0F23
+U+0F24
+U+0F25
+U+0F26
+U+0F27
+U+0F28
+U+0F29
+U+1040
+U+1041
+U+1042
+U+1043
+U+1044
+U+1045
+U+1046
+U+1047
+U+1048
+U+1049
+U+1090
+U+1091
+U+1092
+U+1093
+U+1094
+U+1095
+U+1096
+U+1097
+U+1098
+U+1099
+U+17E0
+U+17E1
+U+17E2
+U+17E3
+U+17E4
+U+17E5
+U+17E6
+U+17E7
+U+17E8
+U+17E9
+U+1810
+U+1811
+U+1812
+U+1813
+U+1814
+U+1815
+U+1816
+U+1817
+U+1818
+U+1819
+U+1946
+U+1947
+U+1948
+U+1949
+U+194A
+U+194B
+U+194C
+U+194D
+U+194E
+U+194F
+U+19D0
+U+19D1
+U+19D2
+U+19D3
+U+19D4
+U+19D5
+U+19D6
+U+19D7
+U+19D8
+U+19D9
+U+1A80
+U+1A81
+U+1A82
+U+1A83
+U+1A84
+U+1A85
+U+1A86
+U+1A87
+U+1A88
+U+1A89
+U+1A90
+U+1A91
+U+1A92
+U+1A93
+U+1A94
+U+1A95
+U+1A96
+U+1A97
+U+1A98
+U+1A99
+U+1B50
+U+1B51
+U+1B52
+U+1B53
+U+1B54
+U+1B55
+U+1B56
+U+1B57
+U+1B58
+U+1B59
+U+1BB0
+U+1BB1
+U+1BB2
+U+1BB3
+U+1BB4
+U+1BB5
+U+1BB6
+U+1BB7
+U+1BB8
+U+1BB9
+U+1C40
+U+1C41
+U+1C42
+U+1C43
+U+1C44
+U+1C45
+U+1C46
+U+1C47
+U+1C48
+U+1C49
+U+1C50
+U+1C51
+U+1C52
+U+1C53
+U+1C54
+U+1C55
+U+1C56
+U+1C57
+U+1C58
+U+1C59
+U+2160
+U+2161
+U+2162
+U+2163
+U+2164
+U+2165
+U+2166
+U+2167
+U+2168
+U+2169
+U+216A
+U+216B
+U+216C
+U+216D
+U+216E
+U+216F
+U+2170
+U+2171
+U+2172
+U+2173
+U+2174
+U+2175
+U+2176
+U+2177
+U+2178
+U+2179
+U+217A
+U+217B
+U+217C
+U+217D
+U+217E
+U+217F
+U+2180
+U+2181
+U+2182
+U+2183
+U+2184
+U+2185
+U+2186
+U+2187
+U+2188
+U+A620
+U+A621
+U+A622
+U+A623
+U+A624
+U+A625
+U+A626
+U+A627
+U+A628
+U+A629
+U+A8D0
+U+A8D1
+U+A8D2
+U+A8D3
+U+A8D4
+U+A8D5
+U+A8D6
+U+A8D7
+U+A8D8
+U+A8D9
+U+A900
+U+A901
+U+A902
+U+A903
+U+A904
+U+A905
+U+A906
+U+A907
+U+A908
+U+A909
+U+A9D0
+U+A9D1
+U+A9D2
+U+A9D3
+U+A9D4
+U+A9D5
+U+A9D6
+U+A9D7
+U+A9D8
+U+A9D9
+U+A9F0
+U+A9F1
+U+A9F2
+U+A9F3
+U+A9F4
+U+A9F5
+U+A9F6
+U+A9F7
+U+A9F8
+U+A9F9
+U+AA50
+U+AA51
+U+AA52
+U+AA53
+U+AA54
+U+AA55
+U+AA56
+U+AA57
+U+AA58
+U+AA59
+U+ABF0
+U+ABF1
+U+ABF2
+U+ABF3
+U+ABF4
+U+ABF5
+U+ABF6
+U+ABF7
+U+ABF8
+U+ABF9
+U+FF10
+U+FF11
+U+FF12
+U+FF13
+U+FF14
+U+FF15
+U+FF16
+U+FF17
+U+FF18
+U+FF19
+U+104A0
+U+104A1
+U+104A2
+U+104A3
+U+104A4
+U+104A5
+U+104A6
+U+104A7
+U+104A8
+U+104A9
+U+10D30
+U+10D31
+U+10D32
+U+10D33
+U+10D34
+U+10D35
+U+10D36
+U+10D37
+U+10D38
+U+10D39
+U+11066
+U+11067
+U+11068
+U+11069
+U+1106A
+U+1106B
+U+1106C
+U+1106D
+U+1106E
+U+1106F
+U+110F0
+U+110F1
+U+110F2
+U+110F3
+U+110F4
+U+110F5
+U+110F6
+U+110F7
+U+110F8
+U+110F9
+U+11136
+U+11137
+U+11138
+U+11139
+U+1113A
+U+1113B
+U+1113C
+U+1113D
+U+1113E
+U+1113F
+U+111D0
+U+111D1
+U+111D2
+U+111D3
+U+111D4
+U+111D5
+U+111D6
+U+111D7
+U+111D8
+U+111D9
+U+112F0
+U+112F1
+U+112F2
+U+112F3
+U+112F4
+U+112F5
+U+112F6
+U+112F7
+U+112F8
+U+112F9
+U+11450
+U+11451
+U+11452
+U+11453
+U+11454
+U+11455
+U+11456
+U+11457
+U+11458
+U+11459
+U+114D0
+U+114D1
+U+114D2
+U+114D3
+U+114D4
+U+114D5
+U+114D6
+U+114D7
+U+114D8
+U+114D9
+U+11650
+U+11651
+U+11652
+U+11653
+U+11654
+U+11655
+U+11656
+U+11657
+U+11658
+U+11659
+U+116C0
+U+116C1
+U+116C2
+U+116C3
+U+116C4
+U+116C5
+U+116C6
+U+116C7
+U+116C8
+U+116C9
+U+11730
+U+11731
+U+11732
+U+11733
+U+11734
+U+11735
+U+11736
+U+11737
+U+11738
+U+11739
+U+118E0
+U+118E1
+U+118E2
+U+118E3
+U+118E4
+U+118E5
+U+118E6
+U+118E7
+U+118E8
+U+118E9
+U+11950
+U+11951
+U+11952
+U+11953
+U+11954
+U+11955
+U+11956
+U+11957
+U+11958
+U+11959
+U+11C50
+U+11C51
+U+11C52
+U+11C53
+U+11C54
+U+11C55
+U+11C56
+U+11C57
+U+11C58
+U+11C59
+U+11D50
+U+11D51
+U+11D52
+U+11D53
+U+11D54
+U+11D55
+U+11D56
+U+11D57
+U+11D58
+U+11D59
+U+11DA0
+U+11DA1
+U+11DA2
+U+11DA3
+U+11DA4
+U+11DA5
+U+11DA6
+U+11DA7
+U+11DA8
+U+11DA9
+U+16A60
+U+16A61
+U+16A62
+U+16A63
+U+16A64
+U+16A65
+U+16A66
+U+16A67
+U+16A68
+U+16A69
+U+16AC0
+U+16AC1
+U+16AC2
+U+16AC3
+U+16AC4
+U+16AC5
+U+16AC6
+U+16AC7
+U+16AC8
+U+16AC9
+U+16B50
+U+16B51
+U+16B52
+U+16B53
+U+16B54
+U+16B55
+U+16B56
+U+16B57
+U+16B58
+U+16B59
+U+1D7CE
+U+1D7CF
+U+1D7D0
+U+1D7D1
+U+1D7D2
+U+1D7D3
+U+1D7D4
+U+1D7D5
+U+1D7D6
+U+1D7D7
+U+1D7D8
+U+1D7D9
+U+1D7DA
+U+1D7DB
+U+1D7DC
+U+1D7DD
+U+1D7DE
+U+1D7DF
+U+1D7E0
+U+1D7E1
+U+1D7E2
+U+1D7E3
+U+1D7E4
+U+1D7E5
+U+1D7E6
+U+1D7E7
+U+1D7E8
+U+1D7E9
+U+1D7EA
+U+1D7EB
+U+1D7EC
+U+1D7ED
+U+1D7EE
+U+1D7EF
+U+1D7F0
+U+1D7F1
+U+1D7F2
+U+1D7F3
+U+1D7F4
+U+1D7F5
+U+1D7F6
+U+1D7F7
+U+1D7F8
+U+1D7F9
+U+1D7FA
+U+1D7FB
+U+1D7FC
+U+1D7FD
+U+1D7FE
+U+1D7FF
+U+1E140
+U+1E141
+U+1E142
+U+1E143
+U+1E144
+U+1E145
+U+1E146
+U+1E147
+U+1E148
+U+1E149
+U+1E2F0
+U+1E2F1
+U+1E2F2
+U+1E2F3
+U+1E2F4
+U+1E2F5
+U+1E2F6
+U+1E2F7
+U+1E2F8
+U+1E2F9
+U+1E950
+U+1E951
+U+1E952
+U+1E953
+U+1E954
+U+1E955
+U+1E956
+U+1E957
+U+1E958
+U+1E959
+U+1FBF0
+U+1FBF1
+U+1FBF2
+U+1FBF3
+U+1FBF4
+U+1FBF5
+U+1FBF6
+U+1FBF7
+U+1FBF8
+U+1FBF9
-U+0030
-U+0031
-U+0032
-U+0033
-U+0034
-U+0035
-U+0036
-U+0037
-U+0038
-U+0039
-U+0660
-U+0661
-U+0662
-U+0663
-U+0664
-U+0665
-U+0666
-U+0667
-U+0668
-U+0669
-U+06F0
-U+06F1
-U+06F2
-U+06F3
-U+06F4
-U+06F5
-U+06F6
-U+06F7
-U+06F8
-U+06F9
-U+07C0
-U+07C1
-U+07C2
-U+07C3
-U+07C4
-U+07C5
-U+07C6
-U+07C7
-U+07C8
-U+07C9
-U+0966
-U+0967
-U+0968
-U+0969
-U+096A
-U+096B
-U+096C
-U+096D
-U+096E
-U+096F
-U+09E6
-U+09E7
-U+09E8
-U+09E9
-U+09EA
-U+09EB
-U+09EC
-U+09ED
-U+09EE
-U+09EF
-U+0A66
-U+0A67
-U+0A68
-U+0A69
-U+0A6A
-U+0A6B
-U+0A6C
-U+0A6D
-U+0A6E
-U+0A6F
-U+0AE6
-U+0AE7
-U+0AE8
-U+0AE9
-U+0AEA
-U+0AEB
-U+0AEC
-U+0AED
-U+0AEE
-U+0AEF
-U+0B66
-U+0B67
-U+0B68
-U+0B69
-U+0B6A
-U+0B6B
-U+0B6C
-U+0B6D
-U+0B6E
-U+0B6F
-U+0BE6
-U+0BE7
-U+0BE8
-U+0BE9
-U+0BEA
-U+0BEB
-U+0BEC
-U+0BED
-U+0BEE
-U+0BEF
-U+0C66
-U+0C67
-U+0C68
-U+0C69
-U+0C6A
-U+0C6B
-U+0C6C
-U+0C6D
-U+0C6E
-U+0C6F
-U+0CE6
-U+0CE7
-U+0CE8
-U+0CE9
-U+0CEA
-U+0CEB
-U+0CEC
-U+0CED
-U+0CEE
-U+0CEF
-U+0D66
-U+0D67
-U+0D68
-U+0D69
-U+0D6A
-U+0D6B
-U+0D6C
-U+0D6D
-U+0D6E
-U+0D6F
-U+0DE6
-U+0DE7
-U+0DE8
-U+0DE9
-U+0DEA
-U+0DEB
-U+0DEC
-U+0DED
-U+0DEE
-U+0DEF
-U+0E50
-U+0E51
-U+0E52
-U+0E53
-U+0E54
-U+0E55
-U+0E56
-U+0E57
-U+0E58
-U+0E59
-U+0ED0
-U+0ED1
-U+0ED2
-U+0ED3
-U+0ED4
-U+0ED5
-U+0ED6
-U+0ED7
-U+0ED8
-U+0ED9
-U+0F20
-U+0F21
-U+0F22
-U+0F23
-U+0F24
-U+0F25
-U+0F26
-U+0F27
-U+0F28
-U+0F29
-U+1040
-U+1041
-U+1042
-U+1043
-U+1044
-U+1045
-U+1046
-U+1047
-U+1048
-U+1049
+U+0030
+U+0031
+U+0032
+U+0033
+U+0034
+U+0035
+U+0036
+U+0037
+U+0038
+U+0039
+U+0660
+U+0661
+U+0662
+U+0663
+U+0664
+U+0665
+U+0666
+U+0667
+U+0668
+U+0669
+U+06F0
+U+06F1
+U+06F2
+U+06F3
+U+06F4
+U+06F5
+U+06F6
+U+06F7
+U+06F8
+U+06F9
+U+07C0
+U+07C1
+U+07C2
+U+07C3
+U+07C4
+U+07C5
+U+07C6
+U+07C7
+U+07C8
+U+07C9
+U+0966
+U+0967
+U+0968
+U+0969
+U+096A
+U+096B
+U+096C
+U+096D
+U+096E
+U+096F
+U+09E6
+U+09E7
+U+09E8
+U+09E9
+U+09EA
+U+09EB
+U+09EC
+U+09ED
+U+09EE
+U+09EF
+U+0A66
+U+0A67
+U+0A68
+U+0A69
+U+0A6A
+U+0A6B
+U+0A6C
+U+0A6D
+U+0A6E
+U+0A6F
+U+0AE6
+U+0AE7
+U+0AE8
+U+0AE9
+U+0AEA
+U+0AEB
+U+0AEC
+U+0AED
+U+0AEE
+U+0AEF
+U+0B66
+U+0B67
+U+0B68
+U+0B69
+U+0B6A
+U+0B6B
+U+0B6C
+U+0B6D
+U+0B6E
+U+0B6F
+U+0BE6
+U+0BE7
+U+0BE8
+U+0BE9
+U+0BEA
+U+0BEB
+U+0BEC
+U+0BED
+U+0BEE
+U+0BEF
+U+0C66
+U+0C67
+U+0C68
+U+0C69
+U+0C6A
+U+0C6B
+U+0C6C
+U+0C6D
+U+0C6E
+U+0C6F
+U+0CE6
+U+0CE7
+U+0CE8
+U+0CE9
+U+0CEA
+U+0CEB
+U+0CEC
+U+0CED
+U+0CEE
+U+0CEF
+U+0D66
+U+0D67
+U+0D68
+U+0D69
+U+0D6A
+U+0D6B
+U+0D6C
+U+0D6D
+U+0D6E
+U+0D6F
+U+0DE6
+U+0DE7
+U+0DE8
+U+0DE9
+U+0DEA
+U+0DEB
+U+0DEC
+U+0DED
+U+0DEE
+U+0DEF
+U+0E50
+U+0E51
+U+0E52
+U+0E53
+U+0E54
+U+0E55
+U+0E56
+U+0E57
+U+0E58
+U+0E59
+U+0ED0
+U+0ED1
+U+0ED2
+U+0ED3
+U+0ED4
+U+0ED5
+U+0ED6
+U+0ED7
+U+0ED8
+U+0ED9
+U+0F20
+U+0F21
+U+0F22
+U+0F23
+U+0F24
+U+0F25
+U+0F26
+U+0F27
+U+0F28
+U+0F29
+U+1040
+U+1041
+U+1042
+U+1043
+U+1044
+U+1045
+U+1046
+U+1047
+U+1048
+U+1049
+U+1090
+U+1091
+U+1092
+U+1093
+U+1094
+U+1095
+U+1096
+U+1097
+U+1098
+U+1099
+U+17E0
+U+17E1
+U+17E2
+U+17E3
+U+17E4
+U+17E5
+U+17E6
+U+17E7
+U+17E8
+U+17E9
+U+1810
+U+1811
+U+1812
+U+1813
+U+1814
+U+1815
+U+1816
+U+1817
+U+1818
+U+1819
+U+1946
+U+1947
+U+1948
+U+1949
+U+194A
+U+194B
+U+194C
+U+194D
+U+194E
+U+194F
+U+19D0
+U+19D1
+U+19D2
+U+19D3
+U+19D4
+U+19D5
+U+19D6
+U+19D7
+U+19D8
+U+19D9
+U+1A80
+U+1A81
+U+1A82
+U+1A83
+U+1A84
+U+1A85
+U+1A86
+U+1A87
+U+1A88
+U+1A89
+U+1A90
+U+1A91
+U+1A92
+U+1A93
+U+1A94
+U+1A95
+U+1A96
+U+1A97
+U+1A98
+U+1A99
+U+1B50
+U+1B51
+U+1B52
+U+1B53
+U+1B54
+U+1B55
+U+1B56
+U+1B57
+U+1B58
+U+1B59
+U+1BB0
+U+1BB1
+U+1BB2
+U+1BB3
+U+1BB4
+U+1BB5
+U+1BB6
+U+1BB7
+U+1BB8
+U+1BB9
+U+1C40
+U+1C41
+U+1C42
+U+1C43
+U+1C44
+U+1C45
+U+1C46
+U+1C47
+U+1C48
+U+1C49
+U+1C50
+U+1C51
+U+1C52
+U+1C53
+U+1C54
+U+1C55
+U+1C56
+U+1C57
+U+1C58
+U+1C59
+U+2160
+U+2161
+U+2162
+U+2163
+U+2164
+U+2165
+U+2166
+U+2167
+U+2168
+U+2169
+U+216A
+U+216B
+U+216C
+U+216D
+U+216E
+U+216F
+U+2170
+U+2171
+U+2172
+U+2173
+U+2174
+U+2175
+U+2176
+U+2177
+U+2178
+U+2179
+U+217A
+U+217B
+U+217C
+U+217D
+U+217E
+U+217F
+U+2180
+U+2181
+U+2182
+U+2183
+U+2184
+U+2185
+U+2186
+U+2187
+U+2188
+U+A620
+U+A621
+U+A622
+U+A623
+U+A624
+U+A625
+U+A626
+U+A627
+U+A628
+U+A629
+U+A8D0
+U+A8D1
+U+A8D2
+U+A8D3
+U+A8D4
+U+A8D5
+U+A8D6
+U+A8D7
+U+A8D8
+U+A8D9
+U+A900
+U+A901
+U+A902
+U+A903
+U+A904
+U+A905
+U+A906
+U+A907
+U+A908
+U+A909
+U+A9D0
+U+A9D1
+U+A9D2
+U+A9D3
+U+A9D4
+U+A9D5
+U+A9D6
+U+A9D7
+U+A9D8
+U+A9D9
+U+A9F0
+U+A9F1
+U+A9F2
+U+A9F3
+U+A9F4
+U+A9F5
+U+A9F6
+U+A9F7
+U+A9F8
+U+A9F9
+U+AA50
+U+AA51
+U+AA52
+U+AA53
+U+AA54
+U+AA55
+U+AA56
+U+AA57
+U+AA58
+U+AA59
+U+ABF0
+U+ABF1
+U+ABF2
+U+ABF3
+U+ABF4
+U+ABF5
+U+ABF6
+U+ABF7
+U+ABF8
+U+ABF9
+U+FF10
+U+FF11
+U+FF12
+U+FF13
+U+FF14
+U+FF15
+U+FF16
+U+FF17
+U+FF18
+U+FF19
U+104A0
U+104A1
U+104A2
U+104A7
U+104A8
U+104A9
-U+1090
-U+1091
-U+1092
-U+1093
-U+1094
-U+1095
-U+1096
-U+1097
-U+1098
-U+1099
U+10D30
U+10D31
U+10D32
U+16A67
U+16A68
U+16A69
+U+16AC0
+U+16AC1
+U+16AC2
+U+16AC3
+U+16AC4
+U+16AC5
+U+16AC6
+U+16AC7
+U+16AC8
+U+16AC9
U+16B50
U+16B51
U+16B52
U+16B57
U+16B58
U+16B59
-U+17E0
-U+17E1
-U+17E2
-U+17E3
-U+17E4
-U+17E5
-U+17E6
-U+17E7
-U+17E8
-U+17E9
-U+1810
-U+1811
-U+1812
-U+1813
-U+1814
-U+1815
-U+1816
-U+1817
-U+1818
-U+1819
-U+1946
-U+1947
-U+1948
-U+1949
-U+194A
-U+194B
-U+194C
-U+194D
-U+194E
-U+194F
-U+19D0
-U+19D1
-U+19D2
-U+19D3
-U+19D4
-U+19D5
-U+19D6
-U+19D7
-U+19D8
-U+19D9
-U+1A80
-U+1A81
-U+1A82
-U+1A83
-U+1A84
-U+1A85
-U+1A86
-U+1A87
-U+1A88
-U+1A89
-U+1A90
-U+1A91
-U+1A92
-U+1A93
-U+1A94
-U+1A95
-U+1A96
-U+1A97
-U+1A98
-U+1A99
-U+1B50
-U+1B51
-U+1B52
-U+1B53
-U+1B54
-U+1B55
-U+1B56
-U+1B57
-U+1B58
-U+1B59
-U+1BB0
-U+1BB1
-U+1BB2
-U+1BB3
-U+1BB4
-U+1BB5
-U+1BB6
-U+1BB7
-U+1BB8
-U+1BB9
-U+1C40
-U+1C41
-U+1C42
-U+1C43
-U+1C44
-U+1C45
-U+1C46
-U+1C47
-U+1C48
-U+1C49
-U+1C50
-U+1C51
-U+1C52
-U+1C53
-U+1C54
-U+1C55
-U+1C56
-U+1C57
-U+1C58
-U+1C59
U+1D7CE
U+1D7CF
U+1D7D0
U+1FBF7
U+1FBF8
U+1FBF9
-U+A620
-U+A621
-U+A622
-U+A623
-U+A624
-U+A625
-U+A626
-U+A627
-U+A628
-U+A629
-U+A8D0
-U+A8D1
-U+A8D2
-U+A8D3
-U+A8D4
-U+A8D5
-U+A8D6
-U+A8D7
-U+A8D8
-U+A8D9
-U+A900
-U+A901
-U+A902
-U+A903
-U+A904
-U+A905
-U+A906
-U+A907
-U+A908
-U+A909
-U+A9D0
-U+A9D1
-U+A9D2
-U+A9D3
-U+A9D4
-U+A9D5
-U+A9D6
-U+A9D7
-U+A9D8
-U+A9D9
-U+A9F0
-U+A9F1
-U+A9F2
-U+A9F3
-U+A9F4
-U+A9F5
-U+A9F6
-U+A9F7
-U+A9F8
-U+A9F9
-U+AA50
-U+AA51
-U+AA52
-U+AA53
-U+AA54
-U+AA55
-U+AA56
-U+AA57
-U+AA58
-U+AA59
-U+ABF0
-U+ABF1
-U+ABF2
-U+ABF3
-U+ABF4
-U+ABF5
-U+ABF6
-U+ABF7
-U+ABF8
-U+ABF9
-U+FF10
-U+FF11
-U+FF12
-U+FF13
-U+FF14
-U+FF15
-U+FF16
-U+FF17
-U+FF18
-U+FF19
--- /dev/null
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+50
+100
+500
+1000
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+50
+100
+500
+1000
+1000
+5000
+10000
+100
+100
+6
+50
+50000
+100000
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
return fopen("./data/tests/bytesequences/control-all.txt", "r");
}
+FILE *data__bytesequence_file_open__decimal(void) {
+
+ return fopen("./data/tests/bytesequences/decimal-all.txt", "r");
+}
+
FILE *data__bytesequence_file_open__digit(void) {
return fopen("./data/tests/bytesequences/digit-all.txt", "r");
return bytes;
}
+FILE *data__value_file_open__decimal(void) {
+
+ return fopen("./data/tests/values/decimal-all.txt", "r");
+}
+
+ssize_t data__value_get_line_long_long(FILE * const file, uint32_t * const value) {
+
+ size_t length = 0;
+ char *line = 0;
+
+ const ssize_t bytes = getline(&line, &length, file);
+
+ if (bytes > 0) {
+ *value = (uint32_t) atol(line);
+ }
+
+ if (line) {
+ free(line);
+ }
+
+ return bytes;
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
extern FILE *data__bytesequence_file_open__control(void);
/**
+ * Open the "decimal" bytesequence file.
+ *
+ * This assumes the following:
+ * - The file path is relative to the current working directory (tests are run from project root).
+ * - The file path is "data/tests/bytesequences/decimal-all.txt".
+ *
+ * @return
+ * Non-zero on success.
+ * 0 on failure.
+ *
+ * @see fopen()
+ */
+extern FILE *data__bytesequence_file_open__decimal(void);
+
+/**
* Open the "digit" bytesequence file.
*
* This assumes the following:
*
* This should handle converting the number between big and little endian as needed.
*
- * The input file is expected to be in base-10 so that existing standarrd functions like atoll() can be easily used.
+ * The input file is expected to be in base-10 so that existing standard functions like atoll() can be easily used.
*
* @param file
* The file stream.
*
* @see atoll()
* @see getline()
- * @see htonl()
*/
extern ssize_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const character);
+/**
+ * Open the "decimal_number" values file.
+ *
+ * This assumes the following:
+ * - The file path is relative to the current working directory (tests are run from project root).
+ * - The file path is "data/tests/values/decimal_number-all.txt".
+ *
+ * @return
+ * Non-zero on success.
+ * 0 on failure.
+ *
+ * @see fopen()
+ */
+extern FILE *data__value_file_open__decimal(void);
+
+/**
+ * Simple line reader that converts the line into a long long.
+ *
+ * This assumes the following:
+ * - The line only contains base-10 digits as ASCII characters.
+ *
+ * The input file is expected to be in base-10 so that existing standard functions like atoll() can be easily used.
+ *
+ * @param file
+ * The file stream.
+ * @param value
+ * The number read from the file at the current line in the stream.
+ *
+ * @return
+ * positive number on success where number represents bytes read.
+ * 0 on success and end of file is reached.
+ * -1 on failure.
+ *
+ * @see atol()
+ * @see getline()
+ */
+extern ssize_t data__value_get_line_long_long(FILE * const file, uint32_t * const value);
+
#ifdef __cplusplus
} // extern "C"
#endif
--- /dev/null
+#include "test-utf.h"
+#include "test-utf-character_is_decimal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void test__f_utf_character_is_decimal__works(void **state) {
+
+ {
+ FILE *file = data__bytesequence_file_open__decimal();
+ FILE *file_number = data__value_file_open__decimal();
+
+ assert_non_null(file);
+ assert_non_null(file_number);
+
+ f_utf_char_t sequence = 0;
+ ssize_t bytes = 0;
+ ssize_t bytes_number = 0;
+ uint32_t number = 0;
+ f_array_length_t line = 0;
+
+ do {
+ bytes = data__bytesequence_get_line(file, &sequence);
+ bytes_number = data__value_get_line_long_long(file_number, &number);
+
+ if (bytes > 0 && bytes_number > 0) {
+ uint32_t value = F_type_size_max_32_unsigned_d;
+
+ const f_status_t status = f_utf_character_is_decimal(sequence, &value);
+
+ assert_int_equal(status, F_true);
+ assert_int_equal(value, number);
+ }
+
+ ++line;
+
+ } while (bytes > 0 && bytes_number > 0);
+
+ fclose(file);
+ fclose(file_number);
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
--- /dev/null
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgpl-2.1-or-later
+ *
+ * Test the function in the utf project.
+ */
+#ifndef _TEST__F_utf_character_is_decimal_h
+#define _TEST__F_utf_character_is_decimal_h
+
+/**
+ * Test that the function works.
+ *
+ * @see f_utf_character_is_decimal()
+ */
+extern void test__f_utf_character_is_decimal__works(void **state);
+
+#endif // _TEST__F_utf_character_is_decimal_h
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
--- /dev/null
+#include "test-utf.h"
+#include "test-utf-is_decimal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void test__f_utf_is_decimal__works(void **state) {
+
+ {
+ FILE *file = data__bytesequence_file_open__decimal();
+ FILE *file_number = data__value_file_open__decimal();
+
+ assert_non_null(file);
+ assert_non_null(file_number);
+
+ f_utf_char_t sequence = 0;
+ ssize_t bytes = 0;
+ ssize_t bytes_number = 0;
+ uint32_t number = 0;
+ f_array_length_t line = 0;
+
+ do {
+ bytes = data__bytesequence_get_line(file, &sequence);
+ bytes_number = data__value_get_line_long_long(file_number, &number);
+
+ if (bytes > 0 && bytes_number > 0) {
+ const uint8_t width = macro_f_utf_char_t_width(sequence);
+ char buffer[5] = { 0, 0, 0, 0, 0 };
+
+ buffer[0] = macro_f_utf_char_t_to_char_1(sequence);
+
+ if (width > 1) {
+ buffer[1] = macro_f_utf_char_t_to_char_2(sequence);
+
+ if (width > 2) {
+ buffer[2] = macro_f_utf_char_t_to_char_3(sequence);
+
+ if (width > 3) {
+ buffer[3] = macro_f_utf_char_t_to_char_4(sequence);
+ }
+ }
+ }
+
+ uint32_t value = F_type_size_max_32_unsigned_d;
+
+ const f_status_t status = f_utf_is_decimal(buffer, 5, &value);
+
+ assert_int_equal(status, F_true);
+ assert_int_equal(value, number);
+ }
+
+ ++line;
+
+ } while (bytes > 0 && bytes_number > 0);
+
+ fclose(file);
+ fclose(file_number);
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
--- /dev/null
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgpl-2.1-or-later
+ *
+ * Test the function in the utf project.
+ */
+#ifndef _TEST__F_utf_is_decimal_h
+#define _TEST__F_utf_is_decimal_h
+
+/**
+ * Test that the function works.
+ *
+ * @see f_utf_is_decimal()
+ */
+extern void test__f_utf_is_decimal__works(void **state);
+
+#endif // _TEST__F_utf_is_decimal_h
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
do {
bytes = data__bytesequence_get_line(file, &sequence);
- if (bytes) {
+ if (bytes > 0) {
const uint8_t width = macro_f_utf_char_t_width(sequence);
char buffer[5] = { 0, 0, 0, 0, 0 };
cmocka_unit_test(test__f_utf_append_nulless__works),
//cmocka_unit_test(test__f_utf_character_is_alphabetic__works),
+ //cmocka_unit_test(test__f_utf_character_is_alphabetic_decimal__works),
+ //cmocka_unit_test(test__f_utf_character_is_alphabetic_digit__works),
+ //cmocka_unit_test(test__f_utf_character_is_alphabetic_numeric__works),
cmocka_unit_test(test__f_utf_character_is_combining__works),
cmocka_unit_test(test__f_utf_character_is_control__works),
+ cmocka_unit_test(test__f_utf_character_is_decimal__works),
cmocka_unit_test(test__f_utf_character_is_digit__works),
cmocka_unit_test(test__f_utf_character_is_emoji__works),
//cmocka_unit_test(test__f_utf_character_is_numeric__works),
cmocka_unit_test(test__f_utf_dynamicss_resize__works),
//cmocka_unit_test(test__f_utf_is_alphabetic__works),
+ //cmocka_unit_test(test__f_utf_is_alphabetic_decimal__works),
+ //cmocka_unit_test(test__f_utf_is_alphabetic_digit__works),
+ //cmocka_unit_test(test__f_utf_is_alphabetic_numeric__works),
cmocka_unit_test(test__f_utf_is_combining__works),
cmocka_unit_test(test__f_utf_is_control__works),
+ cmocka_unit_test(test__f_utf_is_decimal__works),
cmocka_unit_test(test__f_utf_is_digit__works),
cmocka_unit_test(test__f_utf_is_emoji__works),
//cmocka_unit_test(test__f_utf_is_numeric__works),
#include "test-utf-character_is_alphabetic.h"
#include "test-utf-character_is_combining.h"
#include "test-utf-character_is_control.h"
+#include "test-utf-character_is_decimal.h"
#include "test-utf-character_is_digit.h"
#include "test-utf-character_is_emoji.h"
#include "test-utf-character_is_numeric.h"
#include "test-utf-is_alphabetic.h"
#include "test-utf-is_combining.h"
#include "test-utf-is_control.h"
+#include "test-utf-is_decimal.h"
#include "test-utf-is_digit.h"
#include "test-utf-is_emoji.h"
#include "test-utf-is_numeric.h"
if (name.string[i] == '_') continue;
- status = f_utf_is_alphabetic_digit(name.string, name.used);
+ status = f_utf_is_alphabetic_decimal(name.string, name.used);
if (F_status_is_error(status)) return status;
if (status == F_false) return F_false;
* F_none if there is no string to validate (used = 0).
*
* Errors (with error bit) from: f_utf_is_alphabetic().
- * Errors (with error bit) from: f_utf_is_alphabetic_digit().
+ * Errors (with error bit) from: f_utf_is_alphabetic_decimal().
*
* @see f_utf_is_alphabetic()
- * @see f_utf_is_alphabetic_digit()
+ * @see f_utf_is_alphabetic_decimal()
*/
#ifndef _di_controller_validate_define_name_
extern f_status_t controller_validate_environment_name(const f_string_static_t name) F_attribute_visibility_internal_d;