Move the relevant functions into utf/convert.h, utf/is.h, and utf/is_character.h.
Implement f_utf_t as a type of uint32_t.
This should allow more customizability on that type if need be.
It also provides a more explicit context.
Add 'u' to designate the hexdigits are unsigned (This likely more specifically means it is an unsigned int).
This might be removed if it becomes a problem with architectures where "u" represents 16-bits or less.
Due to the size of the work, I am avoiding making this change in all of the Unicode comparison functions.
There are quite a lot of hexdigits in use.
build_sources_library string.c private-string.c string/common.c string/private-dynamic.c string/private-map.c string/private-map_multi.c string/private-quantity.c string/private-range.c string/private-triple.c string/dynamic.c string/map.c string/map_multi.c string/quantity.c string/range.c string/static.c string/triple.c
build_sources_library type_array/array_length.c type_array/cell.c type_array/fll_id.c type_array/int8.c type_array/int16.c type_array/int32.c type_array/int64.c type_array/int128.c type_array/state.c type_array/status.c type_array/uint8.c type_array/uint16.c type_array/uint32.c type_array/uint64.c type_array/uint128.c
build_sources_library type_array/private-array_length.c type_array/private-cell.c type_array/private-fll_id.c type_array/private-int8.c type_array/private-int16.c type_array/private-int32.c type_array/private-int64.c type_array/private-int128.c type_array/private-state.c type_array/private-status.c type_array/private-uint8.c type_array/private-uint16.c type_array/private-uint32.c type_array/private-uint64.c type_array/private-uint128.c
-build_sources_library utf.c private-utf.c utf/common.c utf/dynamic.c utf/map.c utf/private-is_unassigned.c utf/private-string.c utf/string.c utf/triple.c
+build_sources_library utf.c private-utf.c utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/private-is_unassigned.c utf/private-string.c utf/string.c utf/triple.c
build_sources_library-level thread.c private-thread.c
build_sources_library_shared
build_sources_library_static
build_sources_headers type.h
build_sources_headers type_array.h type_array/common.h
build_sources_headers type_array/array_length.h type_array/cell.h type_array/fll_id.h type_array/int8.h type_array/int16.h type_array/int32.h type_array/int64.h type_array/int128.h type_array/state.h type_array/status.h type_array/uint8.h type_array/uint16.h type_array/uint32.h type_array/uint64.h type_array/uint128.h
-build_sources_headers utf.h utf/common.h utf/dynamic.h utf/map.h utf/string.h utf/triple.h
+build_sources_headers utf.h utf/common.h utf/convert.h utf/dynamic.h utf/is.h utf/is_character.h utf/map.h utf/string.h utf/triple.h
build_sources_headers-level thread.h thread/common.h
build_sources_headers_shared
build_sources_headers_static
build_sources_library level_0/string.c level_0/private-string.c level_0/string/common.c level_0/string/private-dynamic.c level_0/string/private-map.c level_0/string/private-map_multi.c level_0/string/private-quantity.c level_0/string/private-range.c level_0/string/private-triple.c level_0/string/dynamic.c level_0/string/map.c level_0/string/map_multi.c level_0/string/quantity.c level_0/string/range.c level_0/string/static.c level_0/string/triple.c
build_sources_library level_0/type_array/array_length.c level_0/type_array/cell.c level_0/type_array/fll_id.c level_0/type_array/int8.c level_0/type_array/int16.c level_0/type_array/int32.c level_0/type_array/int64.c level_0/type_array/int128.c level_0/type_array/state.c level_0/type_array/status.c level_0/type_array/uint8.c level_0/type_array/uint16.c level_0/type_array/uint32.c level_0/type_array/uint64.c level_0/type_array/uint128.c
build_sources_library level_0/type_array/private-array_length.c level_0/type_array/private-cell.c level_0/type_array/private-fll_id.c level_0/type_array/private-int8.c level_0/type_array/private-int16.c level_0/type_array/private-int32.c level_0/type_array/private-int64.c level_0/type_array/private-int128.c level_0/type_array/private-state.c level_0/type_array/private-status.c level_0/type_array/private-uint8.c level_0/type_array/private-uint16.c level_0/type_array/private-uint32.c level_0/type_array/private-uint64.c level_0/type_array/private-uint128.c
-build_sources_library level_0/utf.c level_0/private-utf.c level_0/utf/common.c level_0/utf/dynamic.c level_0/utf/map.c level_0/utf/string.c level_0/utf/triple.c level_0/utf/private-is_unassigned.c level_0/utf/private-string.c
+build_sources_library level_0/utf.c level_0/private-utf.c level_0/utf/common.c level_0/utf/convert.c level_0/utf/dynamic.c level_0/utf/is.c level_0/utf/is_character.c level_0/utf/map.c level_0/utf/string.c level_0/utf/triple.c level_0/utf/private-is_unassigned.c level_0/utf/private-string.c
build_sources_library level_1/control_group.c
build_sources_library level_1/conversion.c level_1/private-conversion.c
build_sources_library level_1/directory.c level_1/private-directory.c
build_sources_headers level_0/type.h
build_sources_headers level_0/type_array.h level_0/type_array/common.h
build_sources_headers level_0/type_array/array_length.h level_0/type_array/cell.h level_0/type_array/fll_id.h level_0/type_array/int8.h level_0/type_array/int16.h level_0/type_array/int32.h level_0/type_array/int64.h level_0/type_array/int128.h level_0/type_array/state.h level_0/type_array/status.h level_0/type_array/uint8.h level_0/type_array/uint16.h level_0/type_array/uint32.h level_0/type_array/uint64.h level_0/type_array/uint128.h
-build_sources_headers level_0/utf.h level_0/utf/common.h level_0/utf/dynamic.h level_0/utf/map.h level_0/utf/string.h level_0/utf/triple.h
+build_sources_headers level_0/utf.h level_0/utf/common.h level_0/utf/convert.h level_0/utf/dynamic.h level_0/utf/is.h level_0/utf/is_character.h level_0/utf/map.h level_0/utf/string.h level_0/utf/triple.h
build_sources_headers level_1/control_group.h
build_sources_headers level_1/conversion.h
build_sources_headers level_1/directory.h level_1/directory/common.h
}
#endif // _di_f_utf_buffer_increment_
-#ifndef _di_f_utf_char_to_character_
- f_status_t f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_character_t *character_utf) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- if (!character_utf) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_char_to_character(character, width_max, character_utf);
- }
-#endif // _di_f_utf_char_to_character_
-
-#ifndef _di_f_utf_character_is_
- f_status_t f_utf_character_is(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_utf_fragment;
- }
-
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_
-
-#ifndef _di_f_utf_character_is_alpha_
- f_status_t f_utf_character_is_alpha(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_alpha(character);
- }
-
- if (isalpha(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_alpha_
-
-#ifndef _di_f_utf_character_is_alpha_digit_
- f_status_t f_utf_character_is_alpha_digit(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_alpha_digit(character);
- }
-
- if (isalnum(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_alpha_digit_
-
-#ifndef _di_f_utf_character_is_alpha_numeric_
- f_status_t f_utf_character_is_alpha_numeric(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_alpha_numeric(character);
- }
-
- if (isalnum(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_alpha_numeric_
-
-#ifndef _di_f_utf_character_is_ascii_
- f_status_t f_utf_character_is_ascii(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- return F_false;
- }
-
- return F_true;
- }
-#endif // _di_f_utf_character_is_ascii_
-
-#ifndef _di_f_utf_character_is_combining_
- f_status_t f_utf_character_is_combining(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_combining(character);
- }
-
- // There are no combining characters in ASCII.
- return F_false;
- }
-#endif // _di_f_utf_character_is_combining_
-
-#ifndef _di_f_utf_character_is_control_
- f_status_t f_utf_character_is_control(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_control(character);
- }
-
- if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_control_
-
-#ifndef _di_f_utf_character_is_control_code_
- f_status_t f_utf_character_is_control_code(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_control_code(character);
- }
-
- if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_control_code_
-
-#ifndef _di_f_utf_character_is_control_picture_
- f_status_t character_is_control_format(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_control_format(character);
- }
-
- // There are no control format characters in ASCII.
- return F_false;
- }
-#endif // _di_f_utf_character_is_control_format_
-
-#ifndef _di_f_utf_character_is_control_picture_
- f_status_t f_utf_character_is_control_picture(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_control_picture(character);
- }
-
- // There are no control picture characters in ASCII.
- return F_false;
- }
-#endif // _di_f_utf_character_is_control_picture_
-
-#ifndef _di_f_utf_character_is_digit_
- f_status_t f_utf_character_is_digit(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_digit(character);
- }
-
- if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_digit_
-
-#ifndef _di_f_utf_character_is_emoji_
- f_status_t f_utf_character_is_emoji(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_emoji(character);
- }
-
- if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_emoji_
-
-#ifndef _di_f_utf_character_is_fragment_
- f_status_t f_utf_character_is_fragment(const f_utf_character_t character) {
-
- return macro_f_utf_character_t_width_is(character) == 1;
- }
-#endif // _di_f_utf_character_is_fragment_
-
-#ifndef _di_f_utf_character_is_graph_
- f_status_t f_utf_character_is_graph(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- if (private_f_utf_character_is_control(character)) {
- return F_false;
- }
-
- if (private_f_utf_character_is_whitespace(character)) {
- return F_false;
- }
-
- if (private_f_utf_character_is_zero_width(character)) {
- return F_false;
- }
-
- return F_true;
- }
-
- if (isgraph(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_graph_
-
-#ifndef _di_f_utf_character_is_numeric_
- f_status_t f_utf_character_is_numeric(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_numeric(character);
- }
-
- if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_numeric_
-
-#ifndef _di_f_utf_character_is_phonetic_
- f_status_t f_utf_character_is_phonetic(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_phonetic(character);
- }
-
- // There are no ASCII phonetic characters.
- return F_false;
- }
-#endif // _di_f_utf_character_is_phonetic_
-
-#ifndef _di_f_utf_character_is_private_
- f_status_t f_utf_character_is_private(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_private(character);
- }
-
- // There are no ASCII private characters.
- return F_false;
- }
-#endif // _di_f_utf_character_is_phonetic_
-
-#ifndef _di_f_utf_character_is_punctuation_
- f_status_t f_utf_character_is_punctuation(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_punctuation(character);
- }
-
- // ASCII: '!' to '#'.
- if (character > 0x20000000 && character < 0x24000000) {
- return F_true;
- }
-
- // ASCII: '%' to '*'.
- if (character > 0x24000000 && character < 0x2b000000) {
- return F_true;
- }
-
- // ASCII: ',' to '/'.
- if (character > 0x2b000000 && character < 0x30000000) {
- return F_true;
- }
-
- // ASCII: ':', ';', '?', or '@'.
- if (character == 0x3a000000 || character == 0x3b000000 || character == 0x3f000000 || character == 0x40000000) {
- return F_true;
- }
-
- // ASCII: '[' to ']'.
- if (character > 0x5a000000 && character < 0x5d000000) {
- return F_true;
- }
-
- // ASCII: '_', '{', or '}'.
- if (character == 0x5f000000 || character == 0x7b000000 || character == 0x7d000000) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_punctuation_
-
-#ifndef _di_f_utf_character_is_symbol_
- f_status_t f_utf_character_is_symbol(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_symbol(character);
- }
-
- // ASCII: '$' or '+'.
- if (character == 0x24000000 || character == 0x2b000000) {
- return F_true;
- }
-
- // ASCII: '<' to '>'.
- if (character > 0x3c000000 && character < 0x3e000000) {
- return F_true;
- }
-
- // ASCII: '^', '`', '|', or '~'.
- if (character == 0x5e000000 || character == 0x60000000 || character == 0x7c000000 || character == 0x7e000000) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_symbol_
-
-#ifndef _di_f_utf_character_is_unassigned_
- f_status_t f_utf_character_is_unassigned(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_unassigned(character);
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_unassigned_
-
-#ifndef _di_f_utf_character_is_valid_
- f_status_t f_utf_character_is_valid(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_valid(character);
- }
-
- return F_true;
- }
-#endif // _di_f_utf_character_is_valid_
-
-#ifndef _di_f_utf_character_is_whitespace_
- f_status_t f_utf_character_is_whitespace(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_whitespace(character);
- }
-
- if (isspace(macro_f_utf_character_t_to_char_1(character))) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_whitespace_
-
-#ifndef _di_f_utf_character_is_whitespace_modifier_
- f_status_t f_utf_character_is_whitespace_modifier(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_whitespace_modifier(character);
- }
-
- // There are no ASCII whitespace modifiers.
- return F_false;
- }
-#endif // _di_f_utf_character_is_whitespace_modifier_
-
-#ifndef _di_f_utf_character_is_whitespace_other_
- f_status_t f_utf_character_is_whitespace_other(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_whitespace_other(character);
- }
-
- // There are no ASCII whitespace other.
- return F_false;
- }
-#endif // _di_f_utf_character_is_whitespace_other_
-
-#ifndef _di_f_utf_character_is_wide_
- f_status_t f_utf_character_is_wide(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_wide(character);
- }
-
- // There are no wide ASCII characters.
- return F_false;
- }
-#endif // _di_f_utf_character_is_wide_
-
-#ifndef _di_f_utf_character_is_word_
- f_status_t f_utf_character_is_word(const f_utf_character_t character, const bool strict) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_word(character, strict);
- }
-
- if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0]) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_word_
-
-#ifndef _di_f_utf_character_is_word_dash_
- f_status_t f_utf_character_is_word_dash(const f_utf_character_t character, const bool strict) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_word_dash(character, strict);
- }
-
- if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0] || character == f_string_ascii_minus_s.string[0]) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_word_dash_
-
-#ifndef _di_f_utf_character_is_word_dash_plus_
- f_status_t f_utf_character_is_word_dash_plus(const f_utf_character_t character, const bool strict) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_word_dash_plus(character, strict);
- }
-
- if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0] || character == f_string_ascii_minus_s.string[0] || character == f_string_ascii_plus_s.string[0]) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_word_dash_plus_
-
-#ifndef _di_f_utf_character_is_zero_width_
- f_status_t f_utf_character_is_zero_width(const f_utf_character_t character) {
-
- if (macro_f_utf_character_t_width_is(character)) {
- if (macro_f_utf_character_t_width_is(character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return private_f_utf_character_is_zero_width(character);
- }
-
- const uint8_t ascii = macro_f_utf_character_t_to_char_1(character);
-
- // These control characters are considered zero-width spaces.
- if (ascii >= 0x00 && ascii <= 0x08) {
- return F_true;
- }
- else if (ascii == 0x0a) {
- return F_true;
- }
- else if (ascii >= 0x0c && ascii <= 0x1f) {
- return F_true;
- }
- else if (ascii == 0x7f) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_character_is_zero_width_
-
-#ifndef _di_f_utf_character_to_char_
- f_status_t f_utf_character_to_char(const f_utf_character_t utf_character, f_string_t *character, f_array_length_t *width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (!utf_character) return F_status_set_error(F_parameter);
- if (!character) return F_status_set_error(F_parameter);
- if (!width_max) return F_status_set_error(F_parameter);
- if (!*width_max) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_character_t_width_is(utf_character)) {
- if (macro_f_utf_character_t_width_is(utf_character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- #if __BYTE_ORDER == __LITTLE_ENDIAN
- uint32_t utf = 0;
-
- switch (macro_f_utf_character_t_width_is(utf_character)) {
- case 1:
- utf = macro_f_utf_character_t_to_char_1(utf_character) << 24;
- break;
- case 2:
- utf = (macro_f_utf_character_t_to_char_2(utf_character) << 24) | (macro_f_utf_character_t_to_char_1(utf_character) << 16);
- break;
- case 3:
- utf = (macro_f_utf_character_t_to_char_3(utf_character) << 24) | (macro_f_utf_character_t_to_char_2(utf_character) << 16) | (macro_f_utf_character_t_to_char_1(utf_character) << 8);
- break;
- case 4:
- utf = (macro_f_utf_character_t_to_char_4(utf_character) << 24) | (macro_f_utf_character_t_to_char_3(utf_character) << 16) | (macro_f_utf_character_t_to_char_2(utf_character) << 8) | macro_f_utf_character_t_to_char_1(utf_character);
- break;
- default:
- return F_status_set_error(F_failure);
- }
-
- memcpy(*character, &utf, sizeof(f_char_t) * macro_f_utf_character_t_width_is(utf_character));
- #else
- memcpy(*character, &utf_character, sizeof(f_char_t) * macro_f_utf_character_t_width_is(utf_character));
- #endif // __BYTE_ORDER == __LITTLE_ENDIAN
-
- return F_none;
- }
-
- #if __BYTE_ORDER == __LITTLE_ENDIAN
- uint32_t utf = macro_f_utf_character_t_to_char_1(utf_character) << 24;
-
- memcpy(*character, &utf, sizeof(f_char_t));
- #else
- memcpy(*character, &utf_character, sizeof(f_char_t));
- #endif // __BYTE_ORDER == __LITTLE_ENDIAN
-
- return F_none;
- }
-#endif // _di_f_utf_character_to_char_
-
-#ifndef _di_f_utf_character_unicode_to_
- f_status_t f_utf_character_unicode_to(const f_utf_character_t character, uint32_t *unicode) {
- #ifndef _di_level_0_parameter_checking_
- if (!unicode) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- return private_f_utf_character_unicode_to(character, unicode);
- }
-#endif // _di_f_utf_character_unicode_to_
-
-#ifndef _di_f_utf_character_unicode_from_
- f_status_t f_utf_character_unicode_from(const uint32_t unicode, f_utf_character_t *character) {
- #ifndef _di_level_0_parameter_checking_
- if (!character) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (unicode > 0x10ffff) {
- return F_status_set_error(F_utf);
- }
-
- // U+0000 -> U+007F.
- if (unicode < 0x80) {
- *character = unicode;
- }
-
- // U+0080 -> U+07FF.
- else if (unicode < 0x800) {
- *character = (unicode & 0x7c0) << 2;
- *character |= unicode & 0x3f;
- *character |= 0xc080;
- }
-
- // U+0800 -> U+FFFF.
- else if (unicode < 0x10000) {
- *character = (unicode & 0xf000) << 4;
- *character |= (unicode & 0xfc0) << 2;
- *character |= unicode & 0x3f;
- *character |= 0xe08080;
- }
-
- // U+100000 -> U+10FFFF.
- else {
- *character = (unicode & 0x1c0000) << 6;
- *character |= (unicode & 0x3f000) << 4;
- *character |= (unicode & 0xfc0) << 2;
- *character |= unicode & 0x3f;
- *character |= 0xe0808080;
- }
-
- return F_none;
- }
-#endif // _di_f_utf_character_unicode_from_
-
-#ifndef _di_f_utf_character_unicode_string_to_
- f_status_t f_utf_character_unicode_string_to(const f_utf_string_t string, const f_array_length_t length, uint32_t *unicode) {
- #ifndef _di_level_0_parameter_checking_
- if (!string) return F_status_set_error(F_parameter);
- if (!unicode) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- f_array_length_t i = 0;
-
- while (i < length && !string[i]) {
- ++i;
- } // while
-
- if (i < length) {
- if (macro_f_utf_character_t_width_is(string[i])) {
- i = length;
- }
- else {
- if (macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_u_s.string[0] || macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_U_s.string[0]) {
- do {
- ++i;
- } while (i < length && !string[i]);
-
- if (i < length && !macro_f_utf_character_t_width_is(string[i]) && macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_plus_s.string[0]) {
- ++i;
- }
- else {
- i = length;
- }
- }
- else {
- i = length;
- }
- }
- }
-
- if (i == length) {
- return F_status_set_error(F_valid_not);
- }
-
- uint32_t value = 0;
- uint8_t character = 0;
-
- for (; i < length; ++i) {
-
- if (!string[i]) continue;
-
- // Only ASCII character numbers are allowed to represent
- if (macro_f_utf_character_t_width_is(string[i])) {
- return F_status_set_error(F_valid_not);
- }
-
- value *= 16;
- character = macro_f_utf_character_t_to_char_1(string[i]);
-
- if (character > 0x2f && character < 0x3a) {
- value += character - 0x30;
- }
- else if (character > 0x40 && character < 0x47) {
- value += (character - 0x41) + 10;
- }
- else if (character > 0x60 && character < 0x67) {
- value += (character - 0x61) + 10;
- }
- else {
- return F_status_set_error(F_valid_not);
- }
- } // for
-
- *unicode = value;
-
- return F_none;
- }
-#endif // _di_f_utf_character_unicode_string_to_
-
-#ifndef _di_f_utf_is_
- f_status_t f_utf_is(const f_string_t character) {
-
- return macro_f_utf_byte_width_is(*character);
- }
-#endif // _di_f_utf_is_
-
-#ifndef _di_f_utf_is_alpha_
- f_status_t f_utf_is_alpha(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_alpha(character_utf);
- }
-
- if (isalpha(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_alpha_
-
-#ifndef _di_f_utf_is_alpha_digit_
- f_status_t f_utf_is_alpha_digit(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_alpha_digit(character_utf);
- }
-
- if (isalnum(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_alpha_digit_
-
-#ifndef _di_f_utf_is_alpha_numeric_
- f_status_t f_utf_is_alpha_numeric(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_alpha_numeric(character_utf);
- }
-
- if (isalnum(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_alpha_numeric_
-
-#ifndef _di_f_utf_is_ascii_
- f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- return F_false;
- }
-
- return F_true;
- }
-#endif // _di_f_utf_is_ascii_
-
-#ifndef _di_f_utf_is_combining_
- f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_combining(character_utf);
- }
-
- // There are no ASCII combining characters.
- return F_false;
- }
-#endif // _di_f_utf_is_combining_
-
-#ifndef _di_f_utf_is_control_
- f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_control(character_utf);
- }
-
- return iscntrl(*character);
- }
-#endif // _di_f_utf_is_control_
-
-#ifndef _di_f_utf_is_control_code
- f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_control_code(character_utf);
- }
-
- if (iscntrl(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_control_code_
-
-#ifndef _di_f_utf_is_control_format_
- f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_control_format(character_utf);
- }
-
- // There are no ASCII control formats.
- return F_false;
- }
-#endif // _di_f_utf_is_control_format_
-
-#ifndef _di_f_utf_is_control_picture_
- f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- if (macro_f_utf_byte_width_is(*character) != 3) {
- return F_false;
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_control_picture(character_utf);
- }
-
- // There are no ASCII control pictures.
- return F_false;
- }
-#endif // _di_f_utf_is_control_picture_
-
-#ifndef _di_f_utf_is_digit_
- f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_digit(character_utf);
- }
-
- if (isdigit(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_digit_
-
-#ifndef _di_f_utf_is_emoji_
- f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_emoji(character_utf);
- }
-
- if (isdigit(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_emoji_
-
-#ifndef _di_f_utf_is_fragment_
- f_status_t f_utf_is_fragment(const f_string_t character) {
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_fragment_
-
-#ifndef _di_f_utf_is_graph_
- f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- if (private_f_utf_character_is_control(character_utf)) {
- return F_false;
- }
-
- if (private_f_utf_character_is_whitespace(character_utf)) {
- return F_false;
- }
-
- // Zero-width characters are be treated as a non-graph.
- if (private_f_utf_character_is_zero_width(character_utf)) {
- return F_false;
- }
-
- return F_true;
- }
-
- if (isgraph(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_graph_
-
-#ifndef _di_f_utf_is_numeric_
- f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_numeric(character_utf);
- }
-
- if (isdigit(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_numeric_
-
-#ifndef _di_f_utf_is_phonetic_
- f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_phonetic(character_utf);
- }
-
- // There are no ASCII phonetic characters.
- return F_false;
- }
-#endif // _di_f_utf_is_phonetic_
-
-#ifndef _di_f_utf_is_private_
- f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_private(character_utf);
- }
-
- // There are no ASCII private characters.
- return F_false;
- }
-#endif // _di_f_utf_is_private_
-
-#ifndef _di_f_utf_is_punctuation_
- f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_punctuation(character_utf);
- }
-
- // ASCII: '!' to '#'.
- if (character[0] > 0x20 && character[0] < 0x24) {
- return F_true;
- }
-
- // ASCII: '%' to '*'.
- if (character[0] > 0x24 && character[0] < 0x2b) {
- return F_true;
- }
-
- // ASCII: ',' to '/'.
- if (character[0] > 0x2b && character[0] < 0x30) {
- return F_true;
- }
-
- // ASCII: ':', ';', '?', or '@'.
- if (character[0] == 0x3a || character[0] == 0x3b || character[0] == 0x3f || character[0] == 0x40) {
- return F_true;
- }
-
- // ASCII: '[' to ']'.
- if (character[0] > 0x5a && character[0] < 0x5d) {
- return F_true;
- }
-
- // ASCII: '_', '{', or '}'.
- if (character[0] == 0x5f || character[0] == 0x7b || character[0] == 0x7d) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_punctuation_
-
-#ifndef _di_f_utf_is_symbol_
- f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_symbol(character_utf);
- }
-
- // ASCII: '$' or '+'.
- if (character[0] == 0x24 || character[0] == 0x2b) {
- return F_true;
- }
-
- // ASCII: '<' to '>'.
- if (character[0] > 0x3c && character[0] < 0x3e) {
- return F_true;
- }
-
- // ASCII: '^', '`', '|', or '~'.
- if (character[0] == 0x5e || character[0] == 0x60 || character[0] == 0x7c || character[0] == 0x7e) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_symbol_
-
-#ifndef _di_f_utf_is_surrogate_
- f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_surrogate(character_utf);
- }
-
- // ASCII are never surrogate.
- return F_false;
- }
-#endif // _di_f_utf_is_surrogate_
-
-#ifndef _di_f_utf_is_unassigned_
- f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_unassigned(character_utf);
- }
-
- // ASCII are never unassigned.
- return F_false;
- }
-#endif // _di_f_utf_is_unassigned_
-
-#ifndef _di_f_utf_is_valid_
- f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_valid(character_utf);
- }
-
- // ASCII are valid.
- return F_true;
- }
-#endif // _di_f_utf_is_valid_
-
-#ifndef _di_f_utf_is_whitespace_
- f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_whitespace(character_utf);
- }
-
- if (isspace(*character)) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_whitespace_
-
-#ifndef _di_f_utf_is_whitespace_modifier_
- f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_whitespace_modifier(character_utf);
- }
-
- // There are no ASCII whitespace modifiers.
- return F_false;
- }
-#endif // _di_f_utf_is_whitespace_modifier_
-
-#ifndef _di_f_utf_is_whitespace_other_
- f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_whitespace_other(character_utf);
- }
-
- // There are no ASCII whitespace other.
- return F_false;
- }
-#endif // _di_f_utf_is_whitespace_other_
-
-#ifndef _di_f_utf_is_wide_
- f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max) {
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_wide(character_utf);
- }
-
- // There are no wide ASCII characters.
- return F_false;
- }
-#endif // _di_f_utf_is_wide_
-
-#ifndef _di_f_utf_is_word_
- f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_word(character_utf, strict);
- }
-
- if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0]) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_word_
-
-#ifndef _di_f_utf_is_word_dash_
- f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_word_dash(character_utf, strict);
- }
-
- if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0]) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_word_dash_
-
-#ifndef _di_f_utf_is_word_dash_plus_
- f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_word_dash_plus(character_utf, strict);
- }
-
- if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0] || *character == f_string_ascii_plus_s.string[0]) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_word_dash_plus_
-
-#ifndef _di_f_utf_is_zero_width_
- f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- if (macro_f_utf_byte_width_is(*character)) {
- if (macro_f_utf_byte_width_is(*character) > width_max) {
- return F_status_set_error(F_failure);
- }
-
- if (macro_f_utf_byte_width_is(*character) == 1) {
- return F_status_set_error(F_utf_fragment);
- }
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_is_zero_width(character_utf);
- }
-
- // These control characters are considered zero-width spaces.
- if (*character >= 0x00 && *character <= 0x08) {
- return F_true;
- }
- else if (*character >= 0x0c && *character <= 0x1f) {
- return F_true;
- }
- else if (*character == 0x7f) {
- return F_true;
- }
-
- return F_false;
- }
-#endif // _di_f_utf_is_zero_width_
-
-#ifndef _di_f_utf_unicode_from_
- f_status_t f_utf_unicode_from(const uint32_t unicode, const f_array_length_t width_max, f_string_t *character) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- if (!unicode) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- // @fixme the code here needs to be reviewed for endianess accuracy for both big and little endian.
- if (unicode > 0x10ffff) {
- return F_status_set_error(F_utf);
- }
-
- if (unicode < 0x80) {
-
- // U+0000 -> U+007F
- (*character)[0] = (uint8_t) unicode;
-
- if (width_max > 1) {
- (*character)[1] = 0;
-
- if (width_max > 2) {
- (*character)[2] = 0;
-
- if (width_max > 3) {
- (*character)[3] = 0;
- }
- }
- }
- }
- else if (unicode < 0x800) {
- if (width_max < 2) {
- return F_status_set_error(F_utf);
- }
-
- // U+0080 -> U+07FF
- (*character)[0] = F_utf_byte_2_d | ((uint8_t) ((unicode & 0x7c0) >> 6));
- (*character)[1] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
-
- if (width_max > 2) {
- (*character)[2] = 0;
-
- if (width_max > 2) {
- (*character)[2] = 0;
- }
- }
- }
- else if (unicode < 0x10000) {
- if (width_max < 3) {
- return F_status_set_error(F_utf);
- }
-
- // U+0800 -> U+FFFF
- (*character)[0] = F_utf_byte_3_d | ((uint8_t) ((unicode & 0xf000) >> 12));
- (*character)[1] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0xfc0) >> 6));
- (*character)[2] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
-
- if (width_max > 3) {
- character[3] = 0;
- }
- }
- else {
- if (width_max < 4) {
- return F_status_set_error(F_utf);
- }
-
- // U+10000 -> U+10FFFF
- (*character)[0] = F_utf_byte_4_d | ((uint8_t) ((unicode & 0x1c0000) >> 18));
- (*character)[1] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0x3f000) >> 12));
- (*character)[2] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0xfc0) >> 6));
- (*character)[3] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
- }
-
- return F_none;
- }
-#endif // _di_f_utf_unicode_from_
-
-#ifndef _di_f_utf_unicode_to_
- f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, uint32_t *unicode) {
- #ifndef _di_level_0_parameter_checking_
- if (width_max < 1) return F_status_set_error(F_parameter);
- if (!unicode) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- f_utf_character_t character_utf = 0;
-
- {
- const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
- if (F_status_is_error(status)) return status;
- }
-
- return private_f_utf_character_unicode_to(character_utf, unicode);
- }
-#endif // _di_f_utf_unicode_to_
-
-#ifndef _di_f_utf_unicode_string_to_f_
- f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, uint32_t *unicode) {
- #ifndef _di_level_0_parameter_checking_
- if (!unicode) return F_status_set_error(F_parameter);
- #endif // _di_level_0_parameter_checking_
-
- f_array_length_t i = 0;
-
- while (i < length && !string[i]) {
- ++i;
- } // while
-
- if (i < length) {
- if (string[i] == f_string_ascii_u_s.string[0] || string[i] == f_string_ascii_U_s.string[0]) {
- do {
- ++i;
- } while (i < length && !string[i]);
-
- if (i < length && string[i] == f_string_ascii_plus_s.string[0]) {
- ++i;
- }
- else {
- i = length;
- }
- }
- else {
- i = length;
- }
- }
-
- if (i == length) {
- return F_status_set_error(F_valid_not);
- }
-
- uint32_t value = 0;
-
- for (; i < length; ++i) {
-
- if (!string[i]) continue;
-
- value *= 16;
-
- if (string[i] > 0x2f && string[i] < 0x3a) {
- value += string[i] - 0x30;
- }
- else if (string[i] > 0x40 && string[i] < 0x47) {
- value += (string[i] - 0x41) + 10;
- }
- else if (string[i] > 0x60 && string[i] < 0x67) {
- value += (string[i] - 0x61) + 10;
- }
- else {
- return F_status_set_error(F_valid_not);
- }
- } // for
-
- if (value > 0x10ffff) {
- return F_status_set_error(F_valid_not);
- }
-
- *unicode = value;
-
- return F_none;
- }
-#endif // _di_f_utf_unicode_string_to_
-
#ifdef __cplusplus
} // extern "C"
#endif
// FLL-0 utf includes.
#include <fll/level_0/utf/common.h>
+#include <fll/level_0/utf/convert.h>
#include <fll/level_0/utf/dynamic.h>
+#include <fll/level_0/utf/is.h>
+#include <fll/level_0/utf/is_character.h>
#include <fll/level_0/utf/map.h>
#include <fll/level_0/utf/string.h>
#include <fll/level_0/utf/triple.h>
extern f_status_t f_utf_buffer_increment(const f_string_static_t buffer, f_string_range_t *range, const f_array_length_t step);
#endif // _di_f_utf_buffer_increment_
-/**
- * Check to see if the entire byte block of the character is a non-ASCII UTF-8 character.
- *
- * This does not validate if the UTF-8 character is a valid UTF-8 character, for that use f_utf_character_is_valid().
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 character.
- * F_false if not a UTF-8 character.
- * F_utf_fragment if this is a UTF-8 character fragment.
- *
- * @see f_utf_character_is_valid()
- */
-#ifndef _di_f_utf_character_is_
- extern f_status_t f_utf_character_is(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 alphabet character.
- * F_false if not a UTF-8 alphabet character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalpha()
- */
-#ifndef _di_f_utf_character_is_alpha_
- extern f_status_t f_utf_character_is_alpha(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_alpha_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
- *
- * Digit characters are decimal digits and letter numbers.
- *
- * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 alpha-digit character.
- * F_false if not a UTF-8 alpha-digit character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_alpha_digit_
- extern f_status_t f_utf_character_is_alpha_digit(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_alpha_digit_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character.
- *
- * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 alpha-numeric character.
- * F_false if not a UTF-8 alpha-numeric character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_alpha_numeric_
- extern f_status_t f_utf_character_is_alpha_numeric(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_alpha_numeric_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII character.
- *
- * This does not validate whether the UTF-8 character is valid or not.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if an ASCII character.
- * F_false if not an ASCII character.
- */
-#ifndef _di_f_utf_character_is_ascii_
- extern f_status_t f_utf_character_is_ascii(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_ascii_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 combining character.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 combining character.
- * F_false if not a UTF-8 combining character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_combining_
- extern f_status_t f_utf_character_is_combining(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_combining_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
- *
- * This includes control code and control format characters.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 control character.
- * F_false if not a UTF-8 control character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
- */
-#ifndef _di_f_utf_character_is_control_
- extern f_status_t f_utf_character_is_control(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_control_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 control code character.
- *
- * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 control code character.
- * F_false if not a UTF-8 control code character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
- */
-#ifndef _di_f_utf_character_is_control_code_
- extern f_status_t f_utf_character_is_control_code(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_control_code_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control format character.
- *
- * Control Format characters are special characters used for formatting.
- * These are considered control characters.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 control format character.
- * F_false if not a UTF-8 control format character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_control_format_
- extern f_status_t f_utf_character_is_control_format(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_control_format_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control picture character.
- *
- * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 control picture character.
- * F_false if not a UTF-8 control picture character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_control_picture_
- extern f_status_t f_utf_character_is_control_picture(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_control_picture_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
- *
- * Digit characters are decimal digits and letter numbers.
- *
- * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 digit character.
- * F_false if not a UTF-8 digit character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isdigit()
- */
-#ifndef _di_f_utf_character_is_digit_
- extern f_status_t f_utf_character_is_digit(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_digit_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 emoji character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 emoji character.
- * F_false if not a UTF-8 emoji character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_emoji_
- extern f_status_t f_utf_character_is_emoji(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_emoji_
-
-/**
- * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
- *
- * Characters whose width is 1-byte are invalid.
- * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
- *
- * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
- *
- * According to rfc3629, the valid octect sequences for UTF-8 are:
- * UTF8-octets = *( UTF8-char )
- * UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
- * UTF8-1 = %x00-7F
- * UTF8-2 = %xC2-DF UTF8-tail
- * UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
- * %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
- * UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
- * %xF4 %x80-8F 2( UTF8-tail )
- * UTF8-tail = %x80-BF
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 character.
- * F_false if not a UTF-8 character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is()
- * @see f_utf_character_is_valid()
- */
-#ifndef _di_f_utf_character_is_fragment_
- extern f_status_t f_utf_character_is_fragment(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_fragment_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 graph.
- * F_false if not a UTF-8 graph.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isgraph()
- */
-#ifndef _di_f_utf_character_is_graph_
- extern f_status_t f_utf_character_is_graph(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_graph_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character.
- *
- * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 numeric character.
- * F_false if not a UTF-8 numeric character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isdigit()
- */
-#ifndef _di_f_utf_character_is_numeric_
- extern f_status_t f_utf_character_is_numeric(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_numeric_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 phonetic character.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 phonetic character.
- * F_false if not a UTF-8 phonetic character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_phonetic_
- extern f_status_t f_utf_character_is_phonetic(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_phonetic_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 private character.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 private character.
- * F_false if not a UTF-8 private character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_private_
- extern f_status_t f_utf_character_is_private(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_private_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 punctuation character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 punctuation character.
- * F_false if not a UTF-8 punctuation character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_punctuation_
- extern f_status_t f_utf_character_is_punctuation(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_punctuation_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 symbol character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 symbol character.
- * F_false if not a UTF-8 symbol character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_symbol_
- extern f_status_t f_utf_character_is_symbol(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_symbol_
-
-/**
- * Check to see if the entire byte block of the character is a unassigned (well-formed) UTF-8 character.
- *
- * The Surrogates and Private Use are not considered unassigned.
- *
- * This does validate if the UTF-8 character is a unassigned UTF-8 character.
- * To not do this, use f_utf_character_is().
- *
- * @param character
- * The character to unassignedate.
- *
- * @return
- * F_true if a UTF-8 unassigned character.
- * F_false if not a UTF-8 unassigned character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is()
- * @see f_utf_character_is_fragment()
- */
-#ifndef _di_f_utf_character_is_unassigned_
- extern f_status_t f_utf_character_is_unassigned(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_value_
-
-/**
- * Check to see if the entire byte block of the character is a valid (well-formed) UTF-8 character.
- *
- * This does validate if the UTF-8 character is a valid UTF-8 character.
- * To not do this, use f_utf_character_is().
- *
- * ASCII character codes are considered valid by this function.
- *
- * Codes U+FDD0 to U+FDEF and any character ending in FFFE or FFFF are non-characters, and are therefore invalid.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 character.
- * F_false if not a UTF-8 character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is()
- * @see f_utf_character_is_fragment()
- */
-#ifndef _di_f_utf_character_is_valid_
- extern f_status_t f_utf_character_is_valid(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_value_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
- *
- * Non-printing or zero-width characters are not considered whitespace.
- * This does include line separators like '\n'.
- * This does not include phonetic spaces, like whitespace modifiers.
- * This does not include non-true whitespace characters, such as Ogham Space Mark ( ).
- *
- * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
- * However, because they are not renderred as whitespace, they are technically not white space.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 whitespace.
- * F_false if not a UTF-8 whitespace.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isspace()
- */
-#ifndef _di_f_utf_character_is_whitespace_
- extern f_status_t f_utf_character_is_whitespace(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_whitespace_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 whitespace modifier character.
- *
- * These are phonetic spaces.
- *
- * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
- * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 modifier character.
- * F_false if not a UTF-8 modifier character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_whitespace_modifier_
- extern f_status_t f_utf_character_is_whitespace_modifier(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_whitespace_modifier_
-
-/**
- * Check to see if the entire byte block of the character is an other type of UTF-8 space character.
- *
- * This is a list of whitespace that are not actual whitespace (because they are graph characters) but are considered whitespace, such as Ogham Space Mark ( ).
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 (other) whitespace.
- * F_false if not a UTF-8 (other) whitespace.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isspace()
- */
-#ifndef _di_f_utf_character_is_whitespace_other_
- extern f_status_t f_utf_character_is_whitespace_other(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_whitespace_other_
-
-/**
- * Get whether or not the UTF-8 character is a wide character on display.
- *
- * This is not the wide as in width in bytes that the codepoint takes up in UTF-8.
- * Instead, this is the width in characters on the screen the character takes up.
- * When "wide" characters that take up either 2 characters on render.
- * When "narrow" characters that take up either 1 character on render.
- *
- * @param character
- * The (UTF-8) character.
- *
- * @return
- * F_none on success.
- *
- * F_failure (with error bit) if width is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_wide_
- extern f_status_t f_utf_character_is_wide(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_wide_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
- *
- * A word character is alpha-numeric or an underscore '_'.
- *
- * @param character
- * The character to validate.
- * @param strict
- * When TRUE, include all appropriate characters by type as per Unicode.
- * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- * When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- * F_true if a UTF-8 word character.
- * F_false if not a UTF-8 word character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_word_
- extern f_status_t f_utf_character_is_word(const f_utf_character_t character, const bool strict);
-#endif // _di_f_utf_character_is_word_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
- *
- * A word dash character is alpha-numeric, an underscore '_' or a dash '-'.
- *
- * Unicode appears to refer to dashes that connect words as a hyphen.
- * Therefore, only these hyphens are considered dashes for the purposes of this function.
- * All other dash-like Unicode characters are not considered a dash here.
- * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
- *
- * @param character
- * The character to validate.
- * @param strict
- * When TRUE, include all appropriate characters by type as per Unicode.
- * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- * When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- * F_true if a UTF-8 word or dash character.
- * F_false if not a UTF-8 word or dash character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_word_dash_
- extern f_status_t f_utf_character_is_word_dash(const f_utf_character_t character, const bool strict);
-#endif // _di_f_utf_character_is_word_dash_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
- *
- * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
- *
- * Unicode appears to refer to dashes that connect words as a hyphen.
- * Therefore, only these hyphens are considered dashes for the purposes of this function.
- * All other dash-like Unicode characters are not considered a dash here.
- * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
- *
- * This does not include zero-width punctuation, such as "invisible plus" (U+2064) (even in strict mode).
- *
- * @param character
- * The character to validate.
- * @param strict
- * When TRUE, include all appropriate characters by type as per Unicode.
- * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- * When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- * F_true if a UTF-8 word or dash character.
- * F_false if not a UTF-8 word or dash character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_word_dash_plus_
- extern f_status_t f_utf_character_is_word_dash_plus(const f_utf_character_t character, const bool strict);
-#endif // _di_f_utf_character_is_word_dash_plus_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character.
- *
- * Only characters that do not print, which are generally called zero-width.
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 non-printing or zero-width character.
- * F_false if not a UTF-8 non-printing or zero-width character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_zero_width_
- extern f_status_t f_utf_character_is_zero_width(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_zero_width_
-
-/**
- * Convert a specialized f_utf_character_t type to a uint8_t, stored as a string (character buffer).
- *
- * This will also convert ASCII characters stored in the utf_character array.
- * This will not resize character.
- *
- * @param utf_character
- * The UTF-8 character to convert from.
- * @param character
- * A uint8_t representation of the UTF-8 character, stored as a string of width bytes.
- * If width_max is 0, then this should be set to 0.
- * @param width_max
- * This is set to the max number of bytes available.
- * This is then updated to represent the max bytes used if enough space is available.
- *
- * @return
- * F_none if conversion was successful.
- *
- * F_failure (with error bit) if width is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_to_char_
- extern f_status_t f_utf_character_to_char(const f_utf_character_t utf_character, f_string_t *character, f_array_length_t *width_max);
-#endif // _di_f_utf_character_to_char_
-
-/**
- * Convert a given (UTF-8) character into Unicode.
- *
- * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
- * The Unicode is a 32-bit integer representing the Unicode (such as U+0001).
- * The Unciode does not need to be interpretted like UTF-8, it simple is a sequence of number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @param character
- * The (UTF-8) character.
- * @param unicode
- * The Unicode number.
- *
- * @return
- * F_none on success.
- *
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is_valid()
- */
-#ifndef _di_f_utf_character_unicode_to_
- extern f_status_t f_utf_character_unicode_to(const f_utf_character_t character, uint32_t *unicode);
-#endif // _di_f_utf_character_unicode_to_
-
-/**
- * Convert a given Unicode into (UTF-8) character.
- *
- * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
- * The Unicode is a 32-bit integer representing the Unicode (such as U+0001).
- * The Unciode does not need to be interpretted like UTF-8, it simple is a sequence of number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @param unicode
- * The Unicode number.
- * @param character
- * The (UTF-8) character.
- *
- * @return
- * F_none on success.
- *
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_unicode_from_
- extern f_status_t f_utf_character_unicode_from(const uint32_t unicode, f_utf_character_t *character);
-#endif // _di_f_utf_character_unicode_from_
-
-/**
- * Convert a string of the format "U+FFFF" into the codepoint value.
- *
- * This ignores NULL characters.
- * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
- * The "U+" prefix is optional.
- * Only ASCII characters are allowed to represent the Unicode sequence string.
- *
- * @param string
- * The string representing a Unicode sequence.
- * @param length
- * The maximum number of characters.
- * @param unicode
- * A 32-bit integer representing the Unicode (such as U+0001).
- * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- * F_none on success.
- *
- * F_failure (with error bit) if width_max is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_valid_not (with error bit) if string is not a valid Unicode string.
- */
-#ifndef _di_f_utf_character_unicode_string_to_
- extern f_status_t f_utf_character_unicode_string_to(const f_utf_string_t string, const f_array_length_t length, uint32_t *unicode);
-#endif // _di_f_utf_character_unicode_string_to_
-
-/**
- * Check to see if the entire byte block of the character is a non-ASCII UTF-8 character.
- *
- * This does not check the validity of the character, for that instead use f_utf_is_valid().
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- *
- * @return
- * F_true if a UTF-8 character.
- * F_false if not a UTF-8 character.
- */
-#ifndef _di_f_utf_is_
- extern f_status_t f_utf_is(const f_string_t character);
-#endif // _di_f_utf_is_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 alphabet character.
- * F_false if not a UTF-8 alphabet character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalpha()
- */
-#ifndef _di_f_utf_is_alpha_
- extern f_status_t f_utf_is_alpha(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_alpha_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or digit character.
- *
- * Digit characters are decimal digits and letter numbers.
- *
- * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 alphabet character.
- * F_false if not a UTF-8 alpha-numeric character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_alpha_digit_
- extern f_status_t f_utf_is_alpha_digit(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_alpha_digit_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character.
- *
- * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 alphabet character.
- * F_false if not a UTF-8 alpha-numeric character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_alpha_numeric_
- extern f_status_t f_utf_is_alpha_numeric(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_alpha_numeric_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if an ASCII character.
- * F_false if not an ASCII character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_ascii_
- extern f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_ascii_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 combining character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 combining character.
- * F_false if not a UTF-8 combining character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_combining_
- extern f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_combining_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
- *
- * This includes control code and control format characters.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 control character.
- * F_false if not a UTF-8 control character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
- */
-#ifndef _di_f_utf_is_control_
- extern f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_control_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control code character.
- *
- * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 control code character.
- * F_false if not a UTF-8 control code character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_control_code_
- extern f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_control_code_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control format character.
- *
- * Control Format characters are special characters used for formatting.
- * These are considered control characters.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 control format character.
- * F_false if not a UTF-8 control format character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_control_format_
- extern f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_control_format_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control picture character.
- *
- * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 control picture character.
- * F_false if not a UTF-8 control picture character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_control_picture_
- extern f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_control_picture_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 digit character.
- * F_false if not a UTF-8 digit character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isdigit()
- */
-#ifndef _di_f_utf_is_digit_
- extern f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_digit_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 emoji character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 emoji character.
- * F_false if not a UTF-8 emoji character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_emoji_
- extern f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_emoji_
-
-/**
- * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
- *
- * Characters whose width is 1-byte are invalid.
- * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
- *
- * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
- *
- * According to rfc3629, the valid octect sequences for UTF-8 are:
- * UTF8-octets = *( UTF8-char )
- * UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
- * UTF8-1 = %x00-7F
- * UTF8-2 = %xC2-DF UTF8-tail
- * UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
- * %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
- * UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
- * %xF4 %x80-8F 2( UTF8-tail )
- * UTF8-tail = %x80-BF
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- *
- * @return
- * F_true if a UTF-8 character.
- * F_false if not a UTF-8 character.
- */
-#ifndef _di_f_utf_is_fragment_
- extern f_status_t f_utf_is_fragment(const f_string_t character);
-#endif // _di_f_utf_is_fragment_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 graph.
- * F_false if not a UTF-8 graph.
- *
- * F_maybe (with error bit) if this could be a graph but width is not long enough.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isgraph()
- */
-#ifndef _di_f_utf_is_graph_
- extern f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_graph_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character.
- *
- * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 numeric character.
- * F_false if not a UTF-8 numeric character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isdigit()
- */
-#ifndef _di_f_utf_is_numeric_
- extern f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_numeric_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 phonetic character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 phonetic character.
- * F_false if not a UTF-8 phonetic character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_phonetic_
- extern f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_phonetic_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 private character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 punctuation character.
- * F_false if not a UTF-8 punctuation character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_private_
- extern f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_private_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 punctuation character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 punctuation character.
- * F_false if not a UTF-8 punctuation character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_punctuation_
- extern f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_punctuation_
-
-/**
- * Check to see if the entire byte block of the character is a surrogate UTF-8 character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 symbol character.
- * F_false if not a UTF-8 symbol character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_surrogate_
- extern f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_surrogate_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 symbol character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 symbol character.
- * F_false if not a UTF-8 symbol character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_symbol_
- extern f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_symbol_
-
-/**
- * Check to see if the entire byte block of the character is a unassigned UTF-8 character.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if an unassigned UTF-8 character.
- * F_false if not an unassigned UTF-8 character.
- *
- * F_parameter (with error bit) if a parameter is inunassigned.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_unassigned_
- extern f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_unassigned_
-
-/**
- * Check to see if the entire byte block of the character is a valid (well-formed) UTF-8 character.
- *
- * This does validate if the UTF-8 character is a valid UTF-8 character.
- * To not do this, use f_utf_is().
- *
- * Valid ASCII character codes are considered valid by this function.
- *
- * Codes U+FDD0 to U+FDEF and any character ending in FFFE or FFFF are non-characters, and are therefore invalid.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a valid UTF-8 character or is an ASCII character.
- * F_false if not a valid UTF-8 character.
- *
- * F_failure (with error bit) if width_max is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_valid_
- extern f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_valid_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
- *
- * Non-printing or zero-width characters are not considered whitespace.
- * This does include line separators like '\n'.
- * This does not include phonetic spaces, like whitespace modifiers.
- * This does not include non-true whitespace characters, such as Ogham Space Mark ( ).
- *
- * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
- * However, because they are not renderred as whitespace, they are technically not white space.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 whitespace.
- * F_false if not a UTF-8 whitespace.
- *
- * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isspace()
- */
-#ifndef _di_f_utf_is_whitespace_
- extern f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_whitespace_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 whitespace modifier character.
- *
- * These are phonetic spaces.
- *
- * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
- * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 whitespace.
- * F_false if not a UTF-8 whitespace.
- *
- * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_whitespace_modifier_
- extern f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_whitespace_modifier_
-
-/**
- * Check to see if the entire byte block of the character is an other type of UTF-8 space character.
- *
- * This is a list of whitespace that are not actual whitespace (because they are graph characters) but are considered whitespace, such as Ogham Space Mark ( ).
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 whitespace.
- * F_false if not a UTF-8 whitespace.
- *
- * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_whitespace_other_
- extern f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_whitespace_other_
-
-/**
- * Get whether or not the UTF-8 character is a wide character on display.
- *
- * This is not the wide as in width in bytes that the codepoint takes up in UTF-8.
- * Instead, this is the width in characters on the screen the character takes up.
- * When "wide" characters that take up either 2 characters on render.
- * When "narrow" characters that take up either 1 character on render.
- *
- * @param character
- * The (UTF-8) character.
- * @param width_max
- * The max width available for representing the UTF-8 character.
- * There must be enough space in the character buffer to handle the Unicode width.
- * It is recommended to always have 4 characters (4 uint8_t) of space available in character.
- * This is the width in bytes the codepoint takes up in UTF-8.
- *
- * @return
- * F_none on success.
- *
- * F_failure (with error bit) if width_max is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_wide_
- extern f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_wide_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
- *
- * A word character is alpha-digit or an underscore '_'.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- * @param strict
- * When TRUE, include all appropriate characters by type as per Unicode.
- * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- * When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- * F_true if a UTF-8 word character.
- * F_false if not a UTF-8 word character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_word_
- extern f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict);
-#endif // _di_f_utf_is_word_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
- *
- * A word dash character is alpha-digit, an underscore '_' or a dash '-'.
- *
- * Unicode appears to refer to dashes that connect words as a hyphen.
- * Therefore, only these hyphens are considered dashes for the purposes of this function.
- * All other dash-like Unicode characters are not considered a dash here.
- * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- * @param strict
- * When TRUE, include all appropriate characters by type as per Unicode.
- * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- * When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- * F_true if a UTF-8 word or dash character.
- * F_false if not a UTF-8 word or dash character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_word_dash_
- extern f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict);
-#endif // _di_f_utf_is_word_dash_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
- *
- * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
- *
- * Unicode appears to refer to dashes that connect words as a hyphen.
- * Therefore, only these hyphens are considered dashes for the purposes of this function.
- * All other dash-like Unicode characters are not considered a dash here.
- * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
- *
- * This does not include zero-width punctuation, such as "invisible plus" (U+2064) (even in strict mode).
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- * @param strict
- * When TRUE, include all appropriate characters by type as per Unicode.
- * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- * When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- * F_true if a UTF-8 word or dash character.
- * F_false if not a UTF-8 word or dash character.
- *
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_word_dash_plus_
- extern f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict);
-#endif // _di_f_utf_is_word_dash_plus_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character.
- *
- * Only characters that do not print, which are generally called zero-width.
- *
- * @param character
- * The character to validate.
- * There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- * The maximum width available for checking.
- * Can be anything greater than 0.
- *
- * @return
- * F_true if a UTF-8 whitespace.
- * F_false if not a UTF-8 whitespace.
- *
- * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_zero_width_
- extern f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_zero_width_
-
-/**
- * Convert an ASCII or UTF-8 character, stored as a string (character buffer), to the specialized f_utf_character_t type.
- *
- * @param character
- * The character string to be converted to the f_utf_character_t type.
- * There must be enough space allocated to convert against, as limited by width_max.
- * @param width_max
- * The maximum width available for converting.
- * Can be anything greater than 0.
- * @param character_utf
- * The generated character of type f_utf_character_t.
- * This value may be cleared, even on error.
- *
- * @return
- * F_none if conversion was successful.
- *
- * F_failure (with error bit) if width is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_char_to_character_
- extern f_status_t f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_character_t *character_utf);
-#endif // _di_f_utf_char_to_character_
-
-/**
- * Convert a given Unicode into a string block representing a single character.
- *
- * @param character
- * The (UTF-8) character.
- * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
- * @param width_max
- * The max width available for representing the UTF-8 character.
- * There must be enough space in the character buffer to handle the Unicode width.
- * It is recommended to always have 4 characters (4 uint8_t) of space available in character.
- * @param unicode
- * A 32-bit integer representing the Unicode (such as U+0001).
- * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- * F_none on success.
- *
- * F_failure (with error bit) if width_max is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_unicode_from_
- extern f_status_t f_utf_unicode_from(const uint32_t unicode, const f_array_length_t width_max, f_string_t *character);
-#endif // _di_f_utf_unicode_from_
-
-/**
- * Convert a given string block representing a single character into Unicode.
- *
- * @param character
- * The (UTF-8) character to convert to the Unicode representation.
- * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
- * @param width_max
- * The max width available for representing the UTF-8 character.
- * There must be enough space in the character buffer to handle the Unicode width.
- * It is recommended to always have 4 characters (4 uint8_t) of space available in character.
- * @param unicode
- * A 32-bit integer representing the Unicode (such as U+0001).
- * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- * F_none on success.
- *
- * F_failure (with error bit) if width is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_utf (with error bit) if unicode is an invalid Unicode character.
- * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is_valid()
- */
-#ifndef _di_f_utf_unicode_to_
- extern f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, uint32_t *unicode);
-#endif // _di_f_utf_unicode_to_
-
-/**
- * Convert a string of the format "U+FFFF" into the codepoint value.
- *
- * This ignores NULL characters.
- * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
- * The "U+" prefix is optional.
- * Only ASCII characters are allowed to represent the Unicode sequence string.
- *
- * @param string
- * The string representing a Unicode sequence.
- * @param length
- * The maximum number of characters.
- * @param unicode
- * A 32-bit integer representing the Unicode (such as U+0001).
- * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- * F_none on success.
- *
- * F_failure (with error bit) if width_max is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
- * F_valid_not (with error bit) if string is not a valid Unicode string.
- */
-#ifndef _di_f_utf_unicode_string_to_
- extern f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, uint32_t *unicode);
-#endif // _di_f_utf_unicode_string_to_
-
#ifdef __cplusplus
} // extern "C"
#endif
* The macro_f_utf_byte_width_is is identical to macro_f_utf_byte_width, except it returns 0 when character is ASCII.
*/
#ifndef _di_f_utf_byte_
- #define F_utf_byte_1_d 0x80 // 1000 0000
- #define F_utf_byte_2_d 0xc0 // 1100 0000
- #define F_utf_byte_3_d 0xe0 // 1110 0000
- #define F_utf_byte_4_d 0xf0 // 1111 0000
+ #define F_utf_byte_1_d 0x80u // 1000 0000
+ #define F_utf_byte_2_d 0xc0u // 1100 0000
+ #define F_utf_byte_3_d 0xe0u // 1110 0000
+ #define F_utf_byte_4_d 0xf0u // 1111 0000
- #define F_utf_byte_off_1_d 0xc0 // 1100 0000
- #define F_utf_byte_off_2_d 0xe0 // 1110 0000
- #define F_utf_byte_off_3_d 0xf0 // 1111 0000
- #define F_utf_byte_off_4_d 0xf8 // 1111 1000
+ #define F_utf_byte_off_1_d 0xc0u // 1100 0000
+ #define F_utf_byte_off_2_d 0xe0u // 1110 0000
+ #define F_utf_byte_off_3_d 0xf0u // 1111 0000
+ #define F_utf_byte_off_4_d 0xf8u // 1111 1000
#define macro_f_utf_byte_is(character) ((character) & F_utf_byte_1_d)
#endif // _di_f_utf_substitute_
/**
+ * Defines type for representing the UTF-8 code as a 32-bit unsigned integer.
+ */
+#ifndef _di_f_utf_t_
+ typedef uint32_t f_utf_t;
+
+ #define f_utf_t_initialize 0
+
+ #define macro_f_utf_initialize(code) code
+#endif // _di_f_utf_t_
+
+/**
* Provide a basic UTF-8 character as a single 4-byte variable.
*
* This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte character.
#ifndef _di_f_utf_character_t_
typedef uint32_t f_utf_character_t;
- #define F_utf_character_mask_byte_1_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000
- #define F_utf_character_mask_byte_2_d 0xffff0000 // 1111 1111, 1111 1111, 0000 0000, 0000 0000
- #define F_utf_character_mask_byte_3_d 0xffffff00 // 1111 1111, 1111 1111, 1111 1111, 0000 0000
- #define F_utf_character_mask_byte_4_d 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111
+ #define F_utf_character_mask_byte_1_d 0xff000000u // 1111 1111, 0000 0000, 0000 0000, 0000 0000
+ #define F_utf_character_mask_byte_2_d 0xffff0000u // 1111 1111, 1111 1111, 0000 0000, 0000 0000
+ #define F_utf_character_mask_byte_3_d 0xffffff00u // 1111 1111, 1111 1111, 1111 1111, 0000 0000
+ #define F_utf_character_mask_byte_4_d 0xffffffffu // 1111 1111, 1111 1111, 1111 1111, 1111 1111
- #define F_utf_character_mask_char_1_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000
- #define F_utf_character_mask_char_2_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000
- #define F_utf_character_mask_char_3_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000
- #define F_utf_character_mask_char_4_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111
+ #define F_utf_character_mask_char_1_d 0xff000000u // 1111 1111, 0000 0000, 0000 0000, 0000 0000
+ #define F_utf_character_mask_char_2_d 0x00ff0000u // 0000 0000, 1111 1111, 0000 0000, 0000 0000
+ #define F_utf_character_mask_char_3_d 0x0000ff00u // 0000 0000, 0000 0000, 1111 1111, 0000 0000
+ #define F_utf_character_mask_char_4_d 0x000000ffu // 0000 0000, 0000 0000, 0000 0000, 1111 1111
- #define macro_f_utf_character_t_to_char_1(character) (((character) & F_utf_character_mask_char_1_d) >> 24) // grab first byte.
- #define macro_f_utf_character_t_to_char_2(character) (((character) & F_utf_character_mask_char_2_d) >> 16) // grab second byte.
- #define macro_f_utf_character_t_to_char_3(character) (((character) & F_utf_character_mask_char_3_d) >> 8) // grab third byte.
+ #define macro_f_utf_character_t_to_char_1(character) (((character) & F_utf_character_mask_char_1_d) >> 24u) // grab first byte.
+ #define macro_f_utf_character_t_to_char_2(character) (((character) & F_utf_character_mask_char_2_d) >> 16u) // grab second byte.
+ #define macro_f_utf_character_t_to_char_3(character) (((character) & F_utf_character_mask_char_3_d) >> 8u) // grab third byte.
#define macro_f_utf_character_t_to_char_4(character) ((character) & F_utf_character_mask_char_4_d) // grab fourth byte.
- #define macro_f_utf_character_t_from_char_1(character) (((character) << 24) & F_utf_character_mask_char_1_d) // shift to first byte.
- #define macro_f_utf_character_t_from_char_2(character) (((character) << 16) & F_utf_character_mask_char_2_d) // shift to second byte.
- #define macro_f_utf_character_t_from_char_3(character) (((character) << 8) & F_utf_character_mask_char_3_d) // shift to third byte.
+ #define macro_f_utf_character_t_from_char_1(character) (((character) << 24u) & F_utf_character_mask_char_1_d) // shift to first byte.
+ #define macro_f_utf_character_t_from_char_2(character) (((character) << 16u) & F_utf_character_mask_char_2_d) // shift to second byte.
+ #define macro_f_utf_character_t_from_char_3(character) (((character) << 8u) & F_utf_character_mask_char_3_d) // shift to third byte.
#define macro_f_utf_character_t_from_char_4(character) ((character) & F_utf_character_mask_char_4_d) // shift to fourth byte.
#define macro_f_utf_character_t_width(character) (macro_f_utf_byte_width(macro_f_utf_character_t_to_char_1(character)))
#endif // _di_f_utf_character_t_
#ifndef _di_f_utf_character_t_codes_
- #define F_utf_character_t_eol_d 0x0a000000 // 0000 1010, 0000 0000, 0000 0000, 0000 0000
- #define F_utf_character_t_eos_d 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000
- #define F_utf_character_t_placeholder_d 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000
+ #define F_utf_character_t_eol_d 0x0a000000u // 0000 1010, 0000 0000, 0000 0000, 0000 0000
+ #define F_utf_character_t_eos_d 0x00000000u // 0000 0000, 0000 0000, 0000 0000, 0000 0000
+ #define F_utf_character_t_placeholder_d 0x00000000u // 0000 0000, 0000 0000, 0000 0000, 0000 0000
#endif // _di_f_utf_character_t_codes_
/**
--- /dev/null
+#include "../utf.h"
+#include "../private-utf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _di_f_utf_char_to_character_
+ f_status_t f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_character_t *character_utf) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ if (!character_utf) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_char_to_character(character, width_max, character_utf);
+ }
+#endif // _di_f_utf_char_to_character_
+
+#ifndef _di_f_utf_character_to_char_
+ f_status_t f_utf_character_to_char(const f_utf_character_t utf_character, f_string_t *character, f_array_length_t *width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (!utf_character) return F_status_set_error(F_parameter);
+ if (!character) return F_status_set_error(F_parameter);
+ if (!width_max) return F_status_set_error(F_parameter);
+ if (!*width_max) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_character_t_width_is(utf_character)) {
+ if (macro_f_utf_character_t_width_is(utf_character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
+ f_utf_t utf = 0;
+
+ switch (macro_f_utf_character_t_width_is(utf_character)) {
+ case 1:
+ utf = macro_f_utf_character_t_to_char_1(utf_character) << 24;
+ break;
+
+ case 2:
+ utf = (macro_f_utf_character_t_to_char_2(utf_character) << 24) | (macro_f_utf_character_t_to_char_1(utf_character) << 16);
+ break;
+
+ case 3:
+ utf = (macro_f_utf_character_t_to_char_3(utf_character) << 24) | (macro_f_utf_character_t_to_char_2(utf_character) << 16) | (macro_f_utf_character_t_to_char_1(utf_character) << 8);
+ break;
+
+ case 4:
+ utf = (macro_f_utf_character_t_to_char_4(utf_character) << 24) | (macro_f_utf_character_t_to_char_3(utf_character) << 16) | (macro_f_utf_character_t_to_char_2(utf_character) << 8) | macro_f_utf_character_t_to_char_1(utf_character);
+ break;
+
+ default:
+ return F_status_set_error(F_failure);
+ }
+
+ memcpy(*character, &utf, sizeof(f_char_t) * macro_f_utf_character_t_width_is(utf_character));
+ #else
+ memcpy(*character, &utf_character, sizeof(f_char_t) * macro_f_utf_character_t_width_is(utf_character));
+ #endif // __BYTE_ORDER == __LITTLE_ENDIAN
+
+ return F_none;
+ }
+
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
+ f_utf_t utf = macro_f_utf_character_t_to_char_1(utf_character) << 24;
+
+ memcpy(*character, &utf, sizeof(f_char_t));
+ #else
+ memcpy(*character, &utf_character, sizeof(f_char_t));
+ #endif // __BYTE_ORDER == __LITTLE_ENDIAN
+
+ return F_none;
+ }
+#endif // _di_f_utf_character_to_char_
+
+#ifndef _di_f_utf_character_unicode_to_
+ f_status_t f_utf_character_unicode_to(const f_utf_character_t character, f_utf_t *unicode) {
+ #ifndef _di_level_0_parameter_checking_
+ if (!unicode) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ return private_f_utf_character_unicode_to(character, unicode);
+ }
+#endif // _di_f_utf_character_unicode_to_
+
+#ifndef _di_f_utf_character_unicode_from_
+ f_status_t f_utf_character_unicode_from(const f_utf_t unicode, f_utf_character_t *character) {
+ #ifndef _di_level_0_parameter_checking_
+ if (!character) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (unicode > 0x10ffff) {
+ return F_status_set_error(F_utf);
+ }
+
+ // U+0000 -> U+007F.
+ if (unicode < 0x80) {
+ *character = unicode;
+ }
+
+ // U+0080 -> U+07FF.
+ else if (unicode < 0x800) {
+ *character = (unicode & 0x7c0) << 2;
+ *character |= unicode & 0x3f;
+ *character |= 0xc080;
+ }
+
+ // U+0800 -> U+FFFF.
+ else if (unicode < 0x10000) {
+ *character = (unicode & 0xf000) << 4;
+ *character |= (unicode & 0xfc0) << 2;
+ *character |= unicode & 0x3f;
+ *character |= 0xe08080;
+ }
+
+ // U+100000 -> U+10FFFF.
+ else {
+ *character = (unicode & 0x1c0000) << 6;
+ *character |= (unicode & 0x3f000) << 4;
+ *character |= (unicode & 0xfc0) << 2;
+ *character |= unicode & 0x3f;
+ *character |= 0xe0808080;
+ }
+
+ return F_none;
+ }
+#endif // _di_f_utf_character_unicode_from_
+
+#ifndef _di_f_utf_character_unicode_string_to_
+ f_status_t f_utf_character_unicode_string_to(const f_utf_string_t string, const f_array_length_t length, f_utf_t *unicode) {
+ #ifndef _di_level_0_parameter_checking_
+ if (!string) return F_status_set_error(F_parameter);
+ if (!unicode) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ f_array_length_t i = 0;
+
+ while (i < length && !string[i]) {
+ ++i;
+ } // while
+
+ if (i < length) {
+ if (macro_f_utf_character_t_width_is(string[i])) {
+ i = length;
+ }
+ else {
+ if (macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_u_s.string[0] || macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_U_s.string[0]) {
+ do {
+ ++i;
+ } while (i < length && !string[i]);
+
+ if (i < length && !macro_f_utf_character_t_width_is(string[i]) && macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_plus_s.string[0]) {
+ ++i;
+ }
+ else {
+ i = length;
+ }
+ }
+ else {
+ i = length;
+ }
+ }
+ }
+
+ if (i == length) {
+ return F_status_set_error(F_valid_not);
+ }
+
+ f_utf_t value = 0;
+ uint8_t character = 0;
+
+ for (; i < length; ++i) {
+
+ if (!string[i]) continue;
+
+ // Only ASCII character numbers are allowed to represent
+ if (macro_f_utf_character_t_width_is(string[i])) {
+ return F_status_set_error(F_valid_not);
+ }
+
+ value *= 16;
+ character = macro_f_utf_character_t_to_char_1(string[i]);
+
+ if (character > 0x2f && character < 0x3a) {
+ value += character - 0x30;
+ }
+ else if (character > 0x40 && character < 0x47) {
+ value += (character - 0x41) + 10;
+ }
+ else if (character > 0x60 && character < 0x67) {
+ value += (character - 0x61) + 10;
+ }
+ else {
+ return F_status_set_error(F_valid_not);
+ }
+ } // for
+
+ *unicode = value;
+
+ return F_none;
+ }
+#endif // _di_f_utf_character_unicode_string_to_
+
+#ifndef _di_f_utf_unicode_from_
+ f_status_t f_utf_unicode_from(const f_utf_t unicode, const f_array_length_t width_max, f_string_t *character) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ if (!unicode) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ // @fixme the code here needs to be reviewed for endianess accuracy for both big and little endian.
+ if (unicode > 0x10ffff) {
+ return F_status_set_error(F_utf);
+ }
+
+ if (unicode < 0x80) {
+
+ // U+0000 -> U+007F
+ (*character)[0] = (uint8_t) unicode;
+
+ if (width_max > 1) {
+ (*character)[1] = 0;
+
+ if (width_max > 2) {
+ (*character)[2] = 0;
+
+ if (width_max > 3) {
+ (*character)[3] = 0;
+ }
+ }
+ }
+ }
+ else if (unicode < 0x800) {
+ if (width_max < 2) {
+ return F_status_set_error(F_utf);
+ }
+
+ // U+0080 -> U+07FF
+ (*character)[0] = F_utf_byte_2_d | ((uint8_t) ((unicode & 0x7c0) >> 6));
+ (*character)[1] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
+
+ if (width_max > 2) {
+ (*character)[2] = 0;
+
+ if (width_max > 2) {
+ (*character)[2] = 0;
+ }
+ }
+ }
+ else if (unicode < 0x10000) {
+ if (width_max < 3) {
+ return F_status_set_error(F_utf);
+ }
+
+ // U+0800 -> U+FFFF
+ (*character)[0] = F_utf_byte_3_d | ((uint8_t) ((unicode & 0xf000) >> 12));
+ (*character)[1] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0xfc0) >> 6));
+ (*character)[2] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
+
+ if (width_max > 3) {
+ character[3] = 0;
+ }
+ }
+ else {
+ if (width_max < 4) {
+ return F_status_set_error(F_utf);
+ }
+
+ // U+10000 -> U+10FFFF
+ (*character)[0] = F_utf_byte_4_d | ((uint8_t) ((unicode & 0x1c0000) >> 18));
+ (*character)[1] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0x3f000) >> 12));
+ (*character)[2] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0xfc0) >> 6));
+ (*character)[3] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
+ }
+
+ return F_none;
+ }
+#endif // _di_f_utf_unicode_from_
+
+#ifndef _di_f_utf_unicode_to_
+ f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, f_utf_t *unicode) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ if (!unicode) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_unicode_to(character_utf, unicode);
+ }
+#endif // _di_f_utf_unicode_to_
+
+#ifndef _di_f_utf_unicode_string_to_f_
+ f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, f_utf_t *unicode) {
+ #ifndef _di_level_0_parameter_checking_
+ if (!unicode) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ f_array_length_t i = 0;
+
+ while (i < length && !string[i]) {
+ ++i;
+ } // while
+
+ if (i < length) {
+ if (string[i] == f_string_ascii_u_s.string[0] || string[i] == f_string_ascii_U_s.string[0]) {
+ do {
+ ++i;
+ } while (i < length && !string[i]);
+
+ if (i < length && string[i] == f_string_ascii_plus_s.string[0]) {
+ ++i;
+ }
+ else {
+ i = length;
+ }
+ }
+ else {
+ i = length;
+ }
+ }
+
+ if (i == length) {
+ return F_status_set_error(F_valid_not);
+ }
+
+ f_utf_t value = 0;
+
+ for (; i < length; ++i) {
+
+ if (!string[i]) continue;
+
+ value *= 16;
+
+ if (string[i] > 0x2f && string[i] < 0x3a) {
+ value += string[i] - 0x30;
+ }
+ else if (string[i] > 0x40 && string[i] < 0x47) {
+ value += (string[i] - 0x41) + 10;
+ }
+ else if (string[i] > 0x60 && string[i] < 0x67) {
+ value += (string[i] - 0x61) + 10;
+ }
+ else {
+ return F_status_set_error(F_valid_not);
+ }
+ } // for
+
+ if (value > 0x10ffff) {
+ return F_status_set_error(F_valid_not);
+ }
+
+ *unicode = value;
+
+ return F_none;
+ }
+#endif // _di_f_utf_unicode_string_to_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
--- /dev/null
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgplv2.1
+ *
+ * Defines UTF-8 "convert" functions.
+ *
+ * This is auto-included by utf.h and should not need to be explicitly included.
+ */
+#ifndef _F_utf_convert_h
+#define _F_utf_convert_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Convert a specialized f_utf_character_t type to a uint8_t, stored as a string (character buffer).
+ *
+ * This will also convert ASCII characters stored in the utf_character array.
+ * This will not resize character.
+ *
+ * @param utf_character
+ * The UTF-8 character to convert from.
+ * @param character
+ * A uint8_t representation of the UTF-8 character, stored as a string of width bytes.
+ * If width_max is 0, then this should be set to 0.
+ * @param width_max
+ * This is set to the max number of bytes available.
+ * This is then updated to represent the max bytes used if enough space is available.
+ *
+ * @return
+ * F_none if conversion was successful.
+ *
+ * F_failure (with error bit) if width is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_to_char_
+ extern f_status_t f_utf_character_to_char(const f_utf_character_t utf_character, f_string_t *character, f_array_length_t *width_max);
+#endif // _di_f_utf_character_to_char_
+
+/**
+ * Convert a given (UTF-8) character into Unicode.
+ *
+ * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
+ * The Unicode is a 32-bit integer representing the Unicode (such as U+0001).
+ * The Unciode does not need to be interpretted like UTF-8, it simple is a sequence of number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @param character
+ * The (UTF-8) character.
+ * @param unicode
+ * The Unicode number.
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is_valid()
+ */
+#ifndef _di_f_utf_character_unicode_to_
+ extern f_status_t f_utf_character_unicode_to(const f_utf_character_t character, f_utf_t *unicode);
+#endif // _di_f_utf_character_unicode_to_
+
+/**
+ * Convert a given Unicode into (UTF-8) character.
+ *
+ * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
+ * The Unicode is a 32-bit integer representing the Unicode (such as U+0001).
+ * The Unciode does not need to be interpretted like UTF-8, it simple is a sequence of number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @param unicode
+ * The Unicode number.
+ * @param character
+ * The (UTF-8) character.
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_unicode_from_
+ extern f_status_t f_utf_character_unicode_from(const f_utf_t unicode, f_utf_character_t *character);
+#endif // _di_f_utf_character_unicode_from_
+
+/**
+ * Convert a string of the format "U+FFFF" into the codepoint value.
+ *
+ * This ignores NULL characters.
+ * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
+ * The "U+" prefix is optional.
+ * Only ASCII characters are allowed to represent the Unicode sequence string.
+ *
+ * @param string
+ * The string representing a Unicode sequence.
+ * @param length
+ * The maximum number of characters.
+ * @param unicode
+ * A 32-bit integer representing the Unicode (such as U+0001).
+ * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_failure (with error bit) if width_max is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_valid_not (with error bit) if string is not a valid Unicode string.
+ */
+#ifndef _di_f_utf_character_unicode_string_to_
+ extern f_status_t f_utf_character_unicode_string_to(const f_utf_string_t string, const f_array_length_t length, f_utf_t *unicode);
+#endif // _di_f_utf_character_unicode_string_to_
+
+/**
+ * Convert an ASCII or UTF-8 character, stored as a string (character buffer), to the specialized f_utf_character_t type.
+ *
+ * @param character
+ * The character string to be converted to the f_utf_character_t type.
+ * There must be enough space allocated to convert against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for converting.
+ * Can be anything greater than 0.
+ * @param character_utf
+ * The generated character of type f_utf_character_t.
+ * This value may be cleared, even on error.
+ *
+ * @return
+ * F_none if conversion was successful.
+ *
+ * F_failure (with error bit) if width is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_char_to_character_
+ extern f_status_t f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_character_t *character_utf);
+#endif // _di_f_utf_char_to_character_
+
+/**
+ * Convert a given Unicode into a string block representing a single character.
+ *
+ * @param character
+ * The (UTF-8) character.
+ * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
+ * @param width_max
+ * The max width available for representing the UTF-8 character.
+ * There must be enough space in the character buffer to handle the Unicode width.
+ * It is recommended to always have 4 characters (4 uint8_t) of space available in character.
+ * @param unicode
+ * A 32-bit integer representing the Unicode (such as U+0001).
+ * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_failure (with error bit) if width_max is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_unicode_from_
+ extern f_status_t f_utf_unicode_from(const f_utf_t unicode, const f_array_length_t width_max, f_string_t *character);
+#endif // _di_f_utf_unicode_from_
+
+/**
+ * Convert a given string block representing a single character into Unicode.
+ *
+ * @param character
+ * The (UTF-8) character to convert to the Unicode representation.
+ * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
+ * @param width_max
+ * The max width available for representing the UTF-8 character.
+ * There must be enough space in the character buffer to handle the Unicode width.
+ * It is recommended to always have 4 characters (4 uint8_t) of space available in character.
+ * @param unicode
+ * A 32-bit integer representing the Unicode (such as U+0001).
+ * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_failure (with error bit) if width is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is_valid()
+ */
+#ifndef _di_f_utf_unicode_to_
+ extern f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, f_utf_t *unicode);
+#endif // _di_f_utf_unicode_to_
+
+/**
+ * Convert a string of the format "U+FFFF" into the codepoint value.
+ *
+ * This ignores NULL characters.
+ * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
+ * The "U+" prefix is optional.
+ * Only ASCII characters are allowed to represent the Unicode sequence string.
+ *
+ * @param string
+ * The string representing a Unicode sequence.
+ * @param length
+ * The maximum number of characters.
+ * @param unicode
+ * A 32-bit integer representing the Unicode (such as U+0001).
+ * Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_failure (with error bit) if width_max is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_valid_not (with error bit) if string is not a valid Unicode string.
+ */
+#ifndef _di_f_utf_unicode_string_to_
+ extern f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, f_utf_t *unicode);
+#endif // _di_f_utf_unicode_string_to_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _F_utf_is_h
#endif // _di_f_utf_string_dynamic_adjust_
/**
- * Resize the dynamic string.
- *
- * @param length
- * The new size to use.
- * @param dynamic
- * The string to resize.
- *
- * @return
- * F_none on success.
- *
- * F_memory_not (with error bit) on out of memory.
- * F_parameter (with error bit) if a parameter is invalid.
- */
-#ifndef _di_f_utf_string_dynamic_adjust_
- extern f_status_t f_utf_string_dynamic_adjust(const f_array_length_t length, f_utf_string_dynamic_t *dynamic);
-#endif // _di_f_utf_string_dynamic_adjust_
-
-/**
* Append the source string onto the destination.
*
* @param source
--- /dev/null
+#include "../utf.h"
+#include "../private-utf.h"
+#include "private-is_unassigned.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _di_f_utf_is_
+ f_status_t f_utf_is(const f_string_t character) {
+
+ return macro_f_utf_byte_width_is(*character);
+ }
+#endif // _di_f_utf_is_
+
+#ifndef _di_f_utf_is_alpha_
+ f_status_t f_utf_is_alpha(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_alpha(character_utf);
+ }
+
+ if (isalpha(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_alpha_
+
+#ifndef _di_f_utf_is_alpha_digit_
+ f_status_t f_utf_is_alpha_digit(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_alpha_digit(character_utf);
+ }
+
+ if (isalnum(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_alpha_digit_
+
+#ifndef _di_f_utf_is_alpha_numeric_
+ f_status_t f_utf_is_alpha_numeric(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_alpha_numeric(character_utf);
+ }
+
+ if (isalnum(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_alpha_numeric_
+
+#ifndef _di_f_utf_is_ascii_
+ f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return F_false;
+ }
+
+ return F_true;
+ }
+#endif // _di_f_utf_is_ascii_
+
+#ifndef _di_f_utf_is_combining_
+ f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_combining(character_utf);
+ }
+
+ // There are no ASCII combining characters.
+ return F_false;
+ }
+#endif // _di_f_utf_is_combining_
+
+#ifndef _di_f_utf_is_control_
+ f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_control(character_utf);
+ }
+
+ return iscntrl(*character);
+ }
+#endif // _di_f_utf_is_control_
+
+#ifndef _di_f_utf_is_control_code
+ f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_control_code(character_utf);
+ }
+
+ if (iscntrl(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_control_code_
+
+#ifndef _di_f_utf_is_control_format_
+ f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_control_format(character_utf);
+ }
+
+ // There are no ASCII control formats.
+ return F_false;
+ }
+#endif // _di_f_utf_is_control_format_
+
+#ifndef _di_f_utf_is_control_picture_
+ f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) != 3) {
+ return F_false;
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_control_picture(character_utf);
+ }
+
+ // There are no ASCII control pictures.
+ return F_false;
+ }
+#endif // _di_f_utf_is_control_picture_
+
+#ifndef _di_f_utf_is_digit_
+ f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_digit(character_utf);
+ }
+
+ if (isdigit(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_digit_
+
+#ifndef _di_f_utf_is_emoji_
+ f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_emoji(character_utf);
+ }
+
+ if (isdigit(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_emoji_
+
+#ifndef _di_f_utf_is_fragment_
+ f_status_t f_utf_is_fragment(const f_string_t character) {
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_fragment_
+
+#ifndef _di_f_utf_is_graph_
+ f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ if (private_f_utf_character_is_control(character_utf)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_whitespace(character_utf)) {
+ return F_false;
+ }
+
+ // Zero-width characters are be treated as a non-graph.
+ if (private_f_utf_character_is_zero_width(character_utf)) {
+ return F_false;
+ }
+
+ return F_true;
+ }
+
+ if (isgraph(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_graph_
+
+#ifndef _di_f_utf_is_numeric_
+ f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_numeric(character_utf);
+ }
+
+ if (isdigit(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_numeric_
+
+#ifndef _di_f_utf_is_phonetic_
+ f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_phonetic(character_utf);
+ }
+
+ // There are no ASCII phonetic characters.
+ return F_false;
+ }
+#endif // _di_f_utf_is_phonetic_
+
+#ifndef _di_f_utf_is_private_
+ f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_private(character_utf);
+ }
+
+ // There are no ASCII private characters.
+ return F_false;
+ }
+#endif // _di_f_utf_is_private_
+
+#ifndef _di_f_utf_is_punctuation_
+ f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_punctuation(character_utf);
+ }
+
+ // ASCII: '!' to '#'.
+ if (character[0] > 0x20 && character[0] < 0x24) {
+ return F_true;
+ }
+
+ // ASCII: '%' to '*'.
+ if (character[0] > 0x24 && character[0] < 0x2b) {
+ return F_true;
+ }
+
+ // ASCII: ',' to '/'.
+ if (character[0] > 0x2b && character[0] < 0x30) {
+ return F_true;
+ }
+
+ // ASCII: ':', ';', '?', or '@'.
+ if (character[0] == 0x3a || character[0] == 0x3b || character[0] == 0x3f || character[0] == 0x40) {
+ return F_true;
+ }
+
+ // ASCII: '[' to ']'.
+ if (character[0] > 0x5a && character[0] < 0x5d) {
+ return F_true;
+ }
+
+ // ASCII: '_', '{', or '}'.
+ if (character[0] == 0x5f || character[0] == 0x7b || character[0] == 0x7d) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_punctuation_
+
+#ifndef _di_f_utf_is_symbol_
+ f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_symbol(character_utf);
+ }
+
+ // ASCII: '$' or '+'.
+ if (character[0] == 0x24 || character[0] == 0x2b) {
+ return F_true;
+ }
+
+ // ASCII: '<' to '>'.
+ if (character[0] > 0x3c && character[0] < 0x3e) {
+ return F_true;
+ }
+
+ // ASCII: '^', '`', '|', or '~'.
+ if (character[0] == 0x5e || character[0] == 0x60 || character[0] == 0x7c || character[0] == 0x7e) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_symbol_
+
+#ifndef _di_f_utf_is_surrogate_
+ f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_surrogate(character_utf);
+ }
+
+ // ASCII are never surrogate.
+ return F_false;
+ }
+#endif // _di_f_utf_is_surrogate_
+
+#ifndef _di_f_utf_is_unassigned_
+ f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_unassigned(character_utf);
+ }
+
+ // ASCII are never unassigned.
+ return F_false;
+ }
+#endif // _di_f_utf_is_unassigned_
+
+#ifndef _di_f_utf_is_valid_
+ f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_valid(character_utf);
+ }
+
+ // ASCII are valid.
+ return F_true;
+ }
+#endif // _di_f_utf_is_valid_
+
+#ifndef _di_f_utf_is_whitespace_
+ f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_whitespace(character_utf);
+ }
+
+ if (isspace(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_whitespace_
+
+#ifndef _di_f_utf_is_whitespace_modifier_
+ f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_whitespace_modifier(character_utf);
+ }
+
+ // There are no ASCII whitespace modifiers.
+ return F_false;
+ }
+#endif // _di_f_utf_is_whitespace_modifier_
+
+#ifndef _di_f_utf_is_whitespace_other_
+ f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_whitespace_other(character_utf);
+ }
+
+ // There are no ASCII whitespace other.
+ return F_false;
+ }
+#endif // _di_f_utf_is_whitespace_other_
+
+#ifndef _di_f_utf_is_wide_
+ f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max) {
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_wide(character_utf);
+ }
+
+ // There are no wide ASCII characters.
+ return F_false;
+ }
+#endif // _di_f_utf_is_wide_
+
+#ifndef _di_f_utf_is_word_
+ f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_word(character_utf, strict);
+ }
+
+ if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0]) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_word_
+
+#ifndef _di_f_utf_is_word_dash_
+ f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_word_dash(character_utf, strict);
+ }
+
+ if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0]) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_word_dash_
+
+#ifndef _di_f_utf_is_word_dash_plus_
+ f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_word_dash_plus(character_utf, strict);
+ }
+
+ if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0] || *character == f_string_ascii_plus_s.string[0]) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_word_dash_plus_
+
+#ifndef _di_f_utf_is_zero_width_
+ f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ if (macro_f_utf_byte_width_is(*character)) {
+ if (macro_f_utf_byte_width_is(*character) > width_max) {
+ return F_status_set_error(F_failure);
+ }
+
+ if (macro_f_utf_byte_width_is(*character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_zero_width(character_utf);
+ }
+
+ // These control characters are considered zero-width spaces.
+ if (*character >= 0x00 && *character <= 0x08) {
+ return F_true;
+ }
+ else if (*character >= 0x0c && *character <= 0x1f) {
+ return F_true;
+ }
+ else if (*character == 0x7f) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_is_zero_width_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
--- /dev/null
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgplv2.1
+ *
+ * Defines UTF-8 "is" functions.
+ *
+ * This is auto-included by utf.h and should not need to be explicitly included.
+ */
+#ifndef _F_utf_is_h
+#define _F_utf_is_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Check to see if the entire byte block of the character is a non-ASCII UTF-8 character.
+ *
+ * This does not check the validity of the character, for that instead use f_utf_is_valid().
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ *
+ * @return
+ * F_true if a UTF-8 character.
+ * F_false if not a UTF-8 character.
+ */
+#ifndef _di_f_utf_is_
+ extern f_status_t f_utf_is(const f_string_t character);
+#endif // _di_f_utf_is_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 alphabet character.
+ * F_false if not a UTF-8 alphabet character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalpha()
+ */
+#ifndef _di_f_utf_is_alpha_
+ extern f_status_t f_utf_is_alpha(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_alpha_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or digit character.
+ *
+ * Digit characters are decimal digits and letter numbers.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 alphabet character.
+ * F_false if not a UTF-8 alpha-numeric character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_alpha_digit_
+ extern f_status_t f_utf_is_alpha_digit(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_alpha_digit_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character.
+ *
+ * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 alphabet character.
+ * F_false if not a UTF-8 alpha-numeric character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_alpha_numeric_
+ extern f_status_t f_utf_is_alpha_numeric(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_alpha_numeric_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if an ASCII character.
+ * F_false if not an ASCII character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_ascii_
+ extern f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_ascii_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 combining character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 combining character.
+ * F_false if not a UTF-8 combining character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_combining_
+ extern f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_combining_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
+ *
+ * This includes control code and control format characters.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 control character.
+ * F_false if not a UTF-8 control character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see iscntrl()
+ */
+#ifndef _di_f_utf_is_control_
+ extern f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control code character.
+ *
+ * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 control code character.
+ * F_false if not a UTF-8 control code character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_control_code_
+ extern f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_code_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control format character.
+ *
+ * Control Format characters are special characters used for formatting.
+ * These are considered control characters.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 control format character.
+ * F_false if not a UTF-8 control format character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_control_format_
+ extern f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_format_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control picture character.
+ *
+ * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 control picture character.
+ * F_false if not a UTF-8 control picture character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_control_picture_
+ extern f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_picture_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 digit character.
+ * F_false if not a UTF-8 digit character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_is_digit_
+ extern f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_digit_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 emoji character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 emoji character.
+ * F_false if not a UTF-8 emoji character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_emoji_
+ extern f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_emoji_
+
+/**
+ * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
+ *
+ * Characters whose width is 1-byte are invalid.
+ * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
+ *
+ * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
+ *
+ * According to rfc3629, the valid octect sequences for UTF-8 are:
+ * UTF8-octets = *( UTF8-char )
+ * UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
+ * UTF8-1 = %x00-7F
+ * UTF8-2 = %xC2-DF UTF8-tail
+ * UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+ * %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+ * UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+ * %xF4 %x80-8F 2( UTF8-tail )
+ * UTF8-tail = %x80-BF
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ *
+ * @return
+ * F_true if a UTF-8 character.
+ * F_false if not a UTF-8 character.
+ */
+#ifndef _di_f_utf_is_fragment_
+ extern f_status_t f_utf_is_fragment(const f_string_t character);
+#endif // _di_f_utf_is_fragment_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 graph.
+ * F_false if not a UTF-8 graph.
+ *
+ * F_maybe (with error bit) if this could be a graph but width is not long enough.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isgraph()
+ */
+#ifndef _di_f_utf_is_graph_
+ extern f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_graph_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character.
+ *
+ * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 numeric character.
+ * F_false if not a UTF-8 numeric character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_is_numeric_
+ extern f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_numeric_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 phonetic character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 phonetic character.
+ * F_false if not a UTF-8 phonetic character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_phonetic_
+ extern f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_phonetic_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 private character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 punctuation character.
+ * F_false if not a UTF-8 punctuation character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_private_
+ extern f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_private_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 punctuation character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 punctuation character.
+ * F_false if not a UTF-8 punctuation character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_punctuation_
+ extern f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_punctuation_
+
+/**
+ * Check to see if the entire byte block of the character is a surrogate UTF-8 character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 symbol character.
+ * F_false if not a UTF-8 symbol character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_surrogate_
+ extern f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_surrogate_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 symbol character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 symbol character.
+ * F_false if not a UTF-8 symbol character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_symbol_
+ extern f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_symbol_
+
+/**
+ * Check to see if the entire byte block of the character is a unassigned UTF-8 character.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if an unassigned UTF-8 character.
+ * F_false if not an unassigned UTF-8 character.
+ *
+ * F_parameter (with error bit) if a parameter is inunassigned.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_unassigned_
+ extern f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_unassigned_
+
+/**
+ * Check to see if the entire byte block of the character is a valid (well-formed) UTF-8 character.
+ *
+ * This does validate if the UTF-8 character is a valid UTF-8 character.
+ * To not do this, use f_utf_is().
+ *
+ * Valid ASCII character codes are considered valid by this function.
+ *
+ * Codes U+FDD0 to U+FDEF and any character ending in FFFE or FFFF are non-characters, and are therefore invalid.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a valid UTF-8 character or is an ASCII character.
+ * F_false if not a valid UTF-8 character.
+ *
+ * F_failure (with error bit) if width_max is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_valid_
+ extern f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_valid_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
+ *
+ * Non-printing or zero-width characters are not considered whitespace.
+ * This does include line separators like '\n'.
+ * This does not include phonetic spaces, like whitespace modifiers.
+ * This does not include non-true whitespace characters, such as Ogham Space Mark ( ).
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * However, because they are not renderred as whitespace, they are technically not white space.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 whitespace.
+ * F_false if not a UTF-8 whitespace.
+ *
+ * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isspace()
+ */
+#ifndef _di_f_utf_is_whitespace_
+ extern f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_whitespace_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 whitespace modifier character.
+ *
+ * These are phonetic spaces.
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 whitespace.
+ * F_false if not a UTF-8 whitespace.
+ *
+ * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_whitespace_modifier_
+ extern f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_whitespace_modifier_
+
+/**
+ * Check to see if the entire byte block of the character is an other type of UTF-8 space character.
+ *
+ * This is a list of whitespace that are not actual whitespace (because they are graph characters) but are considered whitespace, such as Ogham Space Mark ( ).
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 whitespace.
+ * F_false if not a UTF-8 whitespace.
+ *
+ * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_whitespace_other_
+ extern f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_whitespace_other_
+
+/**
+ * Get whether or not the UTF-8 character is a wide character on display.
+ *
+ * This is not the wide as in width in bytes that the codepoint takes up in UTF-8.
+ * Instead, this is the width in characters on the screen the character takes up.
+ * When "wide" characters that take up either 2 characters on render.
+ * When "narrow" characters that take up either 1 character on render.
+ *
+ * @param character
+ * The (UTF-8) character.
+ * @param width_max
+ * The max width available for representing the UTF-8 character.
+ * There must be enough space in the character buffer to handle the Unicode width.
+ * It is recommended to always have 4 characters (4 uint8_t) of space available in character.
+ * This is the width in bytes the codepoint takes up in UTF-8.
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_failure (with error bit) if width_max is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_wide_
+ extern f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_wide_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
+ *
+ * A word character is alpha-digit or an underscore '_'.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ * @param strict
+ * When TRUE, include all appropriate characters by type as per Unicode.
+ * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ * When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ * F_true if a UTF-8 word character.
+ * F_false if not a UTF-8 word character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_word_
+ extern f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict);
+#endif // _di_f_utf_is_word_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
+ *
+ * A word dash character is alpha-digit, an underscore '_' or a dash '-'.
+ *
+ * Unicode appears to refer to dashes that connect words as a hyphen.
+ * Therefore, only these hyphens are considered dashes for the purposes of this function.
+ * All other dash-like Unicode characters are not considered a dash here.
+ * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ * @param strict
+ * When TRUE, include all appropriate characters by type as per Unicode.
+ * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ * When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ * F_true if a UTF-8 word or dash character.
+ * F_false if not a UTF-8 word or dash character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_word_dash_
+ extern f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict);
+#endif // _di_f_utf_is_word_dash_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
+ *
+ * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
+ *
+ * Unicode appears to refer to dashes that connect words as a hyphen.
+ * Therefore, only these hyphens are considered dashes for the purposes of this function.
+ * All other dash-like Unicode characters are not considered a dash here.
+ * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
+ *
+ * This does not include zero-width punctuation, such as "invisible plus" (U+2064) (even in strict mode).
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ * @param strict
+ * When TRUE, include all appropriate characters by type as per Unicode.
+ * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ * When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ * F_true if a UTF-8 word or dash character.
+ * F_false if not a UTF-8 word or dash character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_word_dash_plus_
+ extern f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict);
+#endif // _di_f_utf_is_word_dash_plus_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character.
+ *
+ * Only characters that do not print, which are generally called zero-width.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 whitespace.
+ * F_false if not a UTF-8 whitespace.
+ *
+ * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_zero_width_
+ extern f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_zero_width_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _F_utf_is_h
--- /dev/null
+#include "../utf.h"
+#include "../private-utf.h"
+#include "private-is_unassigned.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _di_f_utf_character_is_
+ f_status_t f_utf_character_is(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_utf_fragment;
+ }
+
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_
+
+#ifndef _di_f_utf_character_is_alpha_
+ f_status_t f_utf_character_is_alpha(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_alpha(character);
+ }
+
+ if (isalpha(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_alpha_
+
+#ifndef _di_f_utf_character_is_alpha_digit_
+ f_status_t f_utf_character_is_alpha_digit(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_alpha_digit(character);
+ }
+
+ if (isalnum(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_alpha_digit_
+
+#ifndef _di_f_utf_character_is_alpha_numeric_
+ f_status_t f_utf_character_is_alpha_numeric(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_alpha_numeric(character);
+ }
+
+ if (isalnum(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_alpha_numeric_
+
+#ifndef _di_f_utf_character_is_ascii_
+ f_status_t f_utf_character_is_ascii(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ return F_false;
+ }
+
+ return F_true;
+ }
+#endif // _di_f_utf_character_is_ascii_
+
+#ifndef _di_f_utf_character_is_combining_
+ f_status_t f_utf_character_is_combining(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_combining(character);
+ }
+
+ // There are no combining characters in ASCII.
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_combining_
+
+#ifndef _di_f_utf_character_is_control_
+ f_status_t f_utf_character_is_control(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_control(character);
+ }
+
+ if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_control_
+
+#ifndef _di_f_utf_character_is_control_code_
+ f_status_t f_utf_character_is_control_code(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_control_code(character);
+ }
+
+ if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_control_code_
+
+#ifndef _di_f_utf_character_is_control_picture_
+ f_status_t character_is_control_format(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_control_format(character);
+ }
+
+ // There are no control format characters in ASCII.
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_control_format_
+
+#ifndef _di_f_utf_character_is_control_picture_
+ f_status_t f_utf_character_is_control_picture(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_control_picture(character);
+ }
+
+ // There are no control picture characters in ASCII.
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_control_picture_
+
+#ifndef _di_f_utf_character_is_digit_
+ f_status_t f_utf_character_is_digit(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_digit(character);
+ }
+
+ if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_digit_
+
+#ifndef _di_f_utf_character_is_emoji_
+ f_status_t f_utf_character_is_emoji(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_emoji(character);
+ }
+
+ if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_emoji_
+
+#ifndef _di_f_utf_character_is_fragment_
+ f_status_t f_utf_character_is_fragment(const f_utf_character_t character) {
+
+ return macro_f_utf_character_t_width_is(character) == 1;
+ }
+#endif // _di_f_utf_character_is_fragment_
+
+#ifndef _di_f_utf_character_is_graph_
+ f_status_t f_utf_character_is_graph(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ if (private_f_utf_character_is_control(character)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_whitespace(character)) {
+ return F_false;
+ }
+
+ if (private_f_utf_character_is_zero_width(character)) {
+ return F_false;
+ }
+
+ return F_true;
+ }
+
+ if (isgraph(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_graph_
+
+#ifndef _di_f_utf_character_is_numeric_
+ f_status_t f_utf_character_is_numeric(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_numeric(character);
+ }
+
+ if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_numeric_
+
+#ifndef _di_f_utf_character_is_phonetic_
+ f_status_t f_utf_character_is_phonetic(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_phonetic(character);
+ }
+
+ // There are no ASCII phonetic characters.
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_phonetic_
+
+#ifndef _di_f_utf_character_is_private_
+ f_status_t f_utf_character_is_private(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_private(character);
+ }
+
+ // There are no ASCII private characters.
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_phonetic_
+
+#ifndef _di_f_utf_character_is_punctuation_
+ f_status_t f_utf_character_is_punctuation(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_punctuation(character);
+ }
+
+ // ASCII: '!' to '#'.
+ if (character > 0x20000000 && character < 0x24000000) {
+ return F_true;
+ }
+
+ // ASCII: '%' to '*'.
+ if (character > 0x24000000 && character < 0x2b000000) {
+ return F_true;
+ }
+
+ // ASCII: ',' to '/'.
+ if (character > 0x2b000000 && character < 0x30000000) {
+ return F_true;
+ }
+
+ // ASCII: ':', ';', '?', or '@'.
+ if (character == 0x3a000000 || character == 0x3b000000 || character == 0x3f000000 || character == 0x40000000) {
+ return F_true;
+ }
+
+ // ASCII: '[' to ']'.
+ if (character > 0x5a000000 && character < 0x5d000000) {
+ return F_true;
+ }
+
+ // ASCII: '_', '{', or '}'.
+ if (character == 0x5f000000 || character == 0x7b000000 || character == 0x7d000000) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_punctuation_
+
+#ifndef _di_f_utf_character_is_symbol_
+ f_status_t f_utf_character_is_symbol(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_symbol(character);
+ }
+
+ // ASCII: '$' or '+'.
+ if (character == 0x24000000 || character == 0x2b000000) {
+ return F_true;
+ }
+
+ // ASCII: '<' to '>'.
+ if (character > 0x3c000000 && character < 0x3e000000) {
+ return F_true;
+ }
+
+ // ASCII: '^', '`', '|', or '~'.
+ if (character == 0x5e000000 || character == 0x60000000 || character == 0x7c000000 || character == 0x7e000000) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_symbol_
+
+#ifndef _di_f_utf_character_is_unassigned_
+ f_status_t f_utf_character_is_unassigned(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_unassigned(character);
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_unassigned_
+
+#ifndef _di_f_utf_character_is_valid_
+ f_status_t f_utf_character_is_valid(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_valid(character);
+ }
+
+ return F_true;
+ }
+#endif // _di_f_utf_character_is_valid_
+
+#ifndef _di_f_utf_character_is_whitespace_
+ f_status_t f_utf_character_is_whitespace(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_whitespace(character);
+ }
+
+ if (isspace(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_whitespace_
+
+#ifndef _di_f_utf_character_is_whitespace_modifier_
+ f_status_t f_utf_character_is_whitespace_modifier(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_whitespace_modifier(character);
+ }
+
+ // There are no ASCII whitespace modifiers.
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_whitespace_modifier_
+
+#ifndef _di_f_utf_character_is_whitespace_other_
+ f_status_t f_utf_character_is_whitespace_other(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_whitespace_other(character);
+ }
+
+ // There are no ASCII whitespace other.
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_whitespace_other_
+
+#ifndef _di_f_utf_character_is_wide_
+ f_status_t f_utf_character_is_wide(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_wide(character);
+ }
+
+ // There are no wide ASCII characters.
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_wide_
+
+#ifndef _di_f_utf_character_is_word_
+ f_status_t f_utf_character_is_word(const f_utf_character_t character, const bool strict) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_word(character, strict);
+ }
+
+ if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0]) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_word_
+
+#ifndef _di_f_utf_character_is_word_dash_
+ f_status_t f_utf_character_is_word_dash(const f_utf_character_t character, const bool strict) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_word_dash(character, strict);
+ }
+
+ if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0] || character == f_string_ascii_minus_s.string[0]) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_word_dash_
+
+#ifndef _di_f_utf_character_is_word_dash_plus_
+ f_status_t f_utf_character_is_word_dash_plus(const f_utf_character_t character, const bool strict) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_word_dash_plus(character, strict);
+ }
+
+ if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0] || character == f_string_ascii_minus_s.string[0] || character == f_string_ascii_plus_s.string[0]) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_word_dash_plus_
+
+#ifndef _di_f_utf_character_is_zero_width_
+ f_status_t f_utf_character_is_zero_width(const f_utf_character_t character) {
+
+ if (macro_f_utf_character_t_width_is(character)) {
+ if (macro_f_utf_character_t_width_is(character) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_zero_width(character);
+ }
+
+ const uint8_t ascii = macro_f_utf_character_t_to_char_1(character);
+
+ // These control characters are considered zero-width spaces.
+ if (ascii >= 0x00 && ascii <= 0x08) {
+ return F_true;
+ }
+ else if (ascii == 0x0a) {
+ return F_true;
+ }
+ else if (ascii >= 0x0c && ascii <= 0x1f) {
+ return F_true;
+ }
+ else if (ascii == 0x7f) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_zero_width_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
--- /dev/null
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgplv2.1
+ *
+ * Defines UTF-8 "character_is" functions.
+ *
+ * This is auto-included by utf.h and should not need to be explicitly included.
+ */
+#ifndef _F_utf_is_character_h
+#define _F_utf_is_character_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Check to see if the entire byte block of the character is a non-ASCII UTF-8 character.
+ *
+ * This does not validate if the UTF-8 character is a valid UTF-8 character, for that use f_utf_character_is_valid().
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 character.
+ * F_false if not a UTF-8 character.
+ * F_utf_fragment if this is a UTF-8 character fragment.
+ *
+ * @see f_utf_character_is_valid()
+ */
+#ifndef _di_f_utf_character_is_
+ extern f_status_t f_utf_character_is(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 alphabet character.
+ * F_false if not a UTF-8 alphabet character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalpha()
+ */
+#ifndef _di_f_utf_character_is_alpha_
+ extern f_status_t f_utf_character_is_alpha(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_alpha_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
+ *
+ * Digit characters are decimal digits and letter numbers.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 alpha-digit character.
+ * F_false if not a UTF-8 alpha-digit character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_alpha_digit_
+ extern f_status_t f_utf_character_is_alpha_digit(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_alpha_digit_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character.
+ *
+ * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 alpha-numeric character.
+ * F_false if not a UTF-8 alpha-numeric character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_alpha_numeric_
+ extern f_status_t f_utf_character_is_alpha_numeric(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_alpha_numeric_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII character.
+ *
+ * This does not validate whether the UTF-8 character is valid or not.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if an ASCII character.
+ * F_false if not an ASCII character.
+ */
+#ifndef _di_f_utf_character_is_ascii_
+ extern f_status_t f_utf_character_is_ascii(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_ascii_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 combining character.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 combining character.
+ * F_false if not a UTF-8 combining character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_combining_
+ extern f_status_t f_utf_character_is_combining(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_combining_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
+ *
+ * This includes control code and control format characters.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 control character.
+ * F_false if not a UTF-8 control character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see iscntrl()
+ */
+#ifndef _di_f_utf_character_is_control_
+ extern f_status_t f_utf_character_is_control(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control code character.
+ *
+ * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 control code character.
+ * F_false if not a UTF-8 control code character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see iscntrl()
+ */
+#ifndef _di_f_utf_character_is_control_code_
+ extern f_status_t f_utf_character_is_control_code(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_code_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control format character.
+ *
+ * Control Format characters are special characters used for formatting.
+ * These are considered control characters.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 control format character.
+ * F_false if not a UTF-8 control format character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_control_format_
+ extern f_status_t f_utf_character_is_control_format(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_format_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control picture character.
+ *
+ * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 control picture character.
+ * F_false if not a UTF-8 control picture character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_control_picture_
+ extern f_status_t f_utf_character_is_control_picture(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_picture_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
+ *
+ * Digit characters are decimal digits and letter numbers.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 digit character.
+ * F_false if not a UTF-8 digit character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_character_is_digit_
+ extern f_status_t f_utf_character_is_digit(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_digit_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 emoji character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 emoji character.
+ * F_false if not a UTF-8 emoji character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_emoji_
+ extern f_status_t f_utf_character_is_emoji(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_emoji_
+
+/**
+ * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
+ *
+ * Characters whose width is 1-byte are invalid.
+ * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
+ *
+ * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
+ *
+ * According to rfc3629, the valid octect sequences for UTF-8 are:
+ * UTF8-octets = *( UTF8-char )
+ * UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
+ * UTF8-1 = %x00-7F
+ * UTF8-2 = %xC2-DF UTF8-tail
+ * UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+ * %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+ * UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+ * %xF4 %x80-8F 2( UTF8-tail )
+ * UTF8-tail = %x80-BF
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 character.
+ * F_false if not a UTF-8 character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_valid()
+ */
+#ifndef _di_f_utf_character_is_fragment_
+ extern f_status_t f_utf_character_is_fragment(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_fragment_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 graph.
+ * F_false if not a UTF-8 graph.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isgraph()
+ */
+#ifndef _di_f_utf_character_is_graph_
+ extern f_status_t f_utf_character_is_graph(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_graph_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character.
+ *
+ * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 numeric character.
+ * F_false if not a UTF-8 numeric character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_character_is_numeric_
+ extern f_status_t f_utf_character_is_numeric(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_numeric_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 phonetic character.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 phonetic character.
+ * F_false if not a UTF-8 phonetic character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_phonetic_
+ extern f_status_t f_utf_character_is_phonetic(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_phonetic_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 private character.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 private character.
+ * F_false if not a UTF-8 private character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_private_
+ extern f_status_t f_utf_character_is_private(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_private_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 punctuation character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 punctuation character.
+ * F_false if not a UTF-8 punctuation character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_punctuation_
+ extern f_status_t f_utf_character_is_punctuation(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_punctuation_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 symbol character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 symbol character.
+ * F_false if not a UTF-8 symbol character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_symbol_
+ extern f_status_t f_utf_character_is_symbol(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_symbol_
+
+/**
+ * Check to see if the entire byte block of the character is a unassigned (well-formed) UTF-8 character.
+ *
+ * The Surrogates and Private Use are not considered unassigned.
+ *
+ * This does validate if the UTF-8 character is a unassigned UTF-8 character.
+ * To not do this, use f_utf_character_is().
+ *
+ * @param character
+ * The character to unassignedate.
+ *
+ * @return
+ * F_true if a UTF-8 unassigned character.
+ * F_false if not a UTF-8 unassigned character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_fragment()
+ */
+#ifndef _di_f_utf_character_is_unassigned_
+ extern f_status_t f_utf_character_is_unassigned(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_value_
+
+/**
+ * Check to see if the entire byte block of the character is a valid (well-formed) UTF-8 character.
+ *
+ * This does validate if the UTF-8 character is a valid UTF-8 character.
+ * To not do this, use f_utf_character_is().
+ *
+ * ASCII character codes are considered valid by this function.
+ *
+ * Codes U+FDD0 to U+FDEF and any character ending in FFFE or FFFF are non-characters, and are therefore invalid.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 character.
+ * F_false if not a UTF-8 character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_fragment()
+ */
+#ifndef _di_f_utf_character_is_valid_
+ extern f_status_t f_utf_character_is_valid(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_value_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
+ *
+ * Non-printing or zero-width characters are not considered whitespace.
+ * This does include line separators like '\n'.
+ * This does not include phonetic spaces, like whitespace modifiers.
+ * This does not include non-true whitespace characters, such as Ogham Space Mark ( ).
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * However, because they are not renderred as whitespace, they are technically not white space.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 whitespace.
+ * F_false if not a UTF-8 whitespace.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isspace()
+ */
+#ifndef _di_f_utf_character_is_whitespace_
+ extern f_status_t f_utf_character_is_whitespace(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_whitespace_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 whitespace modifier character.
+ *
+ * These are phonetic spaces.
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 modifier character.
+ * F_false if not a UTF-8 modifier character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_whitespace_modifier_
+ extern f_status_t f_utf_character_is_whitespace_modifier(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_whitespace_modifier_
+
+/**
+ * Check to see if the entire byte block of the character is an other type of UTF-8 space character.
+ *
+ * This is a list of whitespace that are not actual whitespace (because they are graph characters) but are considered whitespace, such as Ogham Space Mark ( ).
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 (other) whitespace.
+ * F_false if not a UTF-8 (other) whitespace.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isspace()
+ */
+#ifndef _di_f_utf_character_is_whitespace_other_
+ extern f_status_t f_utf_character_is_whitespace_other(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_whitespace_other_
+
+/**
+ * Get whether or not the UTF-8 character is a wide character on display.
+ *
+ * This is not the wide as in width in bytes that the codepoint takes up in UTF-8.
+ * Instead, this is the width in characters on the screen the character takes up.
+ * When "wide" characters that take up either 2 characters on render.
+ * When "narrow" characters that take up either 1 character on render.
+ *
+ * @param character
+ * The (UTF-8) character.
+ *
+ * @return
+ * F_none on success.
+ *
+ * F_failure (with error bit) if width is not long enough to convert.
+ * F_parameter (with error bit) if a parameter is invalid.
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_wide_
+ extern f_status_t f_utf_character_is_wide(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_wide_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
+ *
+ * A word character is alpha-numeric or an underscore '_'.
+ *
+ * @param character
+ * The character to validate.
+ * @param strict
+ * When TRUE, include all appropriate characters by type as per Unicode.
+ * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ * When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ * F_true if a UTF-8 word character.
+ * F_false if not a UTF-8 word character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_word_
+ extern f_status_t f_utf_character_is_word(const f_utf_character_t character, const bool strict);
+#endif // _di_f_utf_character_is_word_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
+ *
+ * A word dash character is alpha-numeric, an underscore '_' or a dash '-'.
+ *
+ * Unicode appears to refer to dashes that connect words as a hyphen.
+ * Therefore, only these hyphens are considered dashes for the purposes of this function.
+ * All other dash-like Unicode characters are not considered a dash here.
+ * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
+ *
+ * @param character
+ * The character to validate.
+ * @param strict
+ * When TRUE, include all appropriate characters by type as per Unicode.
+ * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ * When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ * F_true if a UTF-8 word or dash character.
+ * F_false if not a UTF-8 word or dash character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_word_dash_
+ extern f_status_t f_utf_character_is_word_dash(const f_utf_character_t character, const bool strict);
+#endif // _di_f_utf_character_is_word_dash_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
+ *
+ * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
+ *
+ * Unicode appears to refer to dashes that connect words as a hyphen.
+ * Therefore, only these hyphens are considered dashes for the purposes of this function.
+ * All other dash-like Unicode characters are not considered a dash here.
+ * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
+ *
+ * This does not include zero-width punctuation, such as "invisible plus" (U+2064) (even in strict mode).
+ *
+ * @param character
+ * The character to validate.
+ * @param strict
+ * When TRUE, include all appropriate characters by type as per Unicode.
+ * When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ * When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ * F_true if a UTF-8 word or dash character.
+ * F_false if not a UTF-8 word or dash character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_word_dash_plus_
+ extern f_status_t f_utf_character_is_word_dash_plus(const f_utf_character_t character, const bool strict);
+#endif // _di_f_utf_character_is_word_dash_plus_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character.
+ *
+ * Only characters that do not print, which are generally called zero-width.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 non-printing or zero-width character.
+ * F_false if not a UTF-8 non-printing or zero-width character.
+ *
+ * F_utf (with error bit) if unicode is an invalid Unicode character.
+ * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_zero_width_
+ extern f_status_t f_utf_character_is_zero_width(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_zero_width_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _F_utf_is_character_h
build_objects_program
build_objects_program_shared
build_objects_program_static
-build_sources_library utf.c private-utf.c utf/common.c utf/dynamic.c utf/map.c utf/private-is_unassigned.c utf/private-string.c utf/string.c utf/triple.c
+build_sources_library utf.c private-utf.c utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/private-is_unassigned.c utf/private-string.c utf/string.c utf/triple.c
build_sources_library_shared
build_sources_library_static
build_sources_object
build_sources_program
build_sources_program_shared
build_sources_program_static
-build_sources_headers utf.h utf/common.h utf/dynamic.h utf/map.h utf/string.h utf/triple.h
+build_sources_headers utf.h utf/common.h utf/convert.h utf/dynamic.h utf/is.h utf/is_character.h utf/map.h utf/string.h utf/triple.h
build_sources_headers_shared
build_sources_headers_static
build_sources_script