Update: Restructure parts of f_utf project, and use f_utf_t, and add 'u' to hexdigits...

author Kevin Day <thekevinday@gmail.com>

Mon, 28 Mar 2022 00:56:35 +0000 (19:56 -0500)

committer Kevin Day <thekevinday@gmail.com>

Mon, 28 Mar 2022 01:45:41 +0000 (20:45 -0500)
author Kevin Day <thekevinday@gmail.com>
Mon, 28 Mar 2022 00:56:35 +0000 (19:56 -0500)
committer Kevin Day <thekevinday@gmail.com>
Mon, 28 Mar 2022 01:45:41 +0000 (20:45 -0500)
diff --git a/build/level_0/settings b/build/level_0/settings

index bf18cfc13a536106d61d44f2b2eca26b3c8a1385..ccdc938895c253255094f55cec616367df673c06 100644 (file)
--- a/build/level_0/settings
+++ b/build/level_0/settings
@@ -56,7 +56,7 @@ build_sources_library status_string.c
  build_sources_library string.c private-string.c string/common.c string/private-dynamic.c string/private-map.c string/private-map_multi.c string/private-quantity.c string/private-range.c string/private-triple.c string/dynamic.c string/map.c string/map_multi.c string/quantity.c string/range.c string/static.c string/triple.c
  build_sources_library type_array/array_length.c type_array/cell.c type_array/fll_id.c type_array/int8.c type_array/int16.c type_array/int32.c type_array/int64.c type_array/int128.c type_array/state.c type_array/status.c type_array/uint8.c type_array/uint16.c type_array/uint32.c type_array/uint64.c type_array/uint128.c
  build_sources_library type_array/private-array_length.c type_array/private-cell.c type_array/private-fll_id.c type_array/private-int8.c type_array/private-int16.c type_array/private-int32.c type_array/private-int64.c type_array/private-int128.c type_array/private-state.c type_array/private-status.c type_array/private-uint8.c type_array/private-uint16.c type_array/private-uint32.c type_array/private-uint64.c type_array/private-uint128.c
-build_sources_library utf.c private-utf.c utf/common.c utf/dynamic.c utf/map.c utf/private-is_unassigned.c utf/private-string.c utf/string.c utf/triple.c
+build_sources_library utf.c private-utf.c utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/private-is_unassigned.c utf/private-string.c utf/string.c utf/triple.c
  build_sources_library-level thread.c private-thread.c
  build_sources_library_shared
  build_sources_library_static
@@ -92,7 +92,7 @@ build_sources_headers string.h string/common.h string/dynamic.h string/map.h str
  build_sources_headers type.h
  build_sources_headers type_array.h type_array/common.h
  build_sources_headers type_array/array_length.h type_array/cell.h type_array/fll_id.h type_array/int8.h type_array/int16.h type_array/int32.h type_array/int64.h type_array/int128.h type_array/state.h type_array/status.h type_array/uint8.h type_array/uint16.h type_array/uint32.h type_array/uint64.h type_array/uint128.h
-build_sources_headers utf.h utf/common.h utf/dynamic.h utf/map.h utf/string.h utf/triple.h
+build_sources_headers utf.h utf/common.h utf/convert.h utf/dynamic.h utf/is.h utf/is_character.h utf/map.h utf/string.h utf/triple.h
  build_sources_headers-level thread.h thread/common.h
  build_sources_headers_shared
  build_sources_headers_static
diff --git a/build/monolithic/settings b/build/monolithic/settings

index 6984a24aec03c541b4bac7548833082f69fdf40d..2301b60d98219fae277a803f2380c3f6c7b311e7 100644 (file)
--- a/build/monolithic/settings
+++ b/build/monolithic/settings
@@ -56,7 +56,7 @@ build_sources_library level_0/status_string.c
  build_sources_library level_0/string.c level_0/private-string.c level_0/string/common.c level_0/string/private-dynamic.c level_0/string/private-map.c level_0/string/private-map_multi.c level_0/string/private-quantity.c level_0/string/private-range.c level_0/string/private-triple.c level_0/string/dynamic.c level_0/string/map.c level_0/string/map_multi.c level_0/string/quantity.c level_0/string/range.c level_0/string/static.c level_0/string/triple.c
  build_sources_library level_0/type_array/array_length.c level_0/type_array/cell.c level_0/type_array/fll_id.c level_0/type_array/int8.c level_0/type_array/int16.c level_0/type_array/int32.c level_0/type_array/int64.c level_0/type_array/int128.c level_0/type_array/state.c level_0/type_array/status.c level_0/type_array/uint8.c level_0/type_array/uint16.c level_0/type_array/uint32.c level_0/type_array/uint64.c level_0/type_array/uint128.c
  build_sources_library level_0/type_array/private-array_length.c level_0/type_array/private-cell.c level_0/type_array/private-fll_id.c level_0/type_array/private-int8.c level_0/type_array/private-int16.c level_0/type_array/private-int32.c level_0/type_array/private-int64.c level_0/type_array/private-int128.c level_0/type_array/private-state.c level_0/type_array/private-status.c level_0/type_array/private-uint8.c level_0/type_array/private-uint16.c level_0/type_array/private-uint32.c level_0/type_array/private-uint64.c level_0/type_array/private-uint128.c
-build_sources_library level_0/utf.c level_0/private-utf.c level_0/utf/common.c level_0/utf/dynamic.c level_0/utf/map.c level_0/utf/string.c level_0/utf/triple.c level_0/utf/private-is_unassigned.c level_0/utf/private-string.c
+build_sources_library level_0/utf.c level_0/private-utf.c level_0/utf/common.c level_0/utf/convert.c level_0/utf/dynamic.c level_0/utf/is.c level_0/utf/is_character.c level_0/utf/map.c level_0/utf/string.c level_0/utf/triple.c level_0/utf/private-is_unassigned.c level_0/utf/private-string.c
  build_sources_library level_1/control_group.c
  build_sources_library level_1/conversion.c level_1/private-conversion.c
  build_sources_library level_1/directory.c level_1/private-directory.c
@@ -114,7 +114,7 @@ build_sources_headers level_0/string.h level_0/string/common.h level_0/string/dy
  build_sources_headers level_0/type.h
  build_sources_headers level_0/type_array.h level_0/type_array/common.h
  build_sources_headers level_0/type_array/array_length.h level_0/type_array/cell.h level_0/type_array/fll_id.h level_0/type_array/int8.h level_0/type_array/int16.h level_0/type_array/int32.h level_0/type_array/int64.h level_0/type_array/int128.h level_0/type_array/state.h level_0/type_array/status.h level_0/type_array/uint8.h level_0/type_array/uint16.h level_0/type_array/uint32.h level_0/type_array/uint64.h level_0/type_array/uint128.h
-build_sources_headers level_0/utf.h level_0/utf/common.h level_0/utf/dynamic.h level_0/utf/map.h level_0/utf/string.h level_0/utf/triple.h
+build_sources_headers level_0/utf.h level_0/utf/common.h level_0/utf/convert.h level_0/utf/dynamic.h level_0/utf/is.h level_0/utf/is_character.h level_0/utf/map.h level_0/utf/string.h level_0/utf/triple.h
  build_sources_headers level_1/control_group.h
  build_sources_headers level_1/conversion.h
  build_sources_headers level_1/directory.h level_1/directory/common.h
diff --git a/level_0/f_utf/c/utf.c b/level_0/f_utf/c/utf.c

index 02b17deff0d7fdb1f78d4b5a37dd0313c6d48a54..dcf4344a39b80259c7005ebc741adffc5c98ef57 100644 (file)
--- a/level_0/f_utf/c/utf.c
+++ b/level_0/f_utf/c/utf.c
@@ -85,1875 +85,6 @@ extern "C" {
    }
  #endif // _di_f_utf_buffer_increment_
  
-#ifndef _di_f_utf_char_to_character_
-  f_status_t f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_character_t *character_utf) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-      if (!character_utf) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character) > width_max) {
-      return F_status_set_error(F_failure);
-    }
-
-    if (macro_f_utf_byte_width_is(*character) == 1) {
-      return F_status_set_error(F_utf_fragment);
-    }
-
-    return private_f_utf_char_to_character(character, width_max, character_utf);
-  }
-#endif // _di_f_utf_char_to_character_
-
-#ifndef _di_f_utf_character_is_
-  f_status_t f_utf_character_is(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_utf_fragment;
-      }
-
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_
-
-#ifndef _di_f_utf_character_is_alpha_
-  f_status_t f_utf_character_is_alpha(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_alpha(character);
-    }
-
-    if (isalpha(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_alpha_
-
-#ifndef _di_f_utf_character_is_alpha_digit_
-  f_status_t f_utf_character_is_alpha_digit(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_alpha_digit(character);
-    }
-
-    if (isalnum(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_alpha_digit_
-
-#ifndef _di_f_utf_character_is_alpha_numeric_
-  f_status_t f_utf_character_is_alpha_numeric(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_alpha_numeric(character);
-    }
-
-    if (isalnum(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_alpha_numeric_
-
-#ifndef _di_f_utf_character_is_ascii_
-  f_status_t f_utf_character_is_ascii(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      return F_false;
-    }
-
-    return F_true;
-  }
-#endif // _di_f_utf_character_is_ascii_
-
-#ifndef _di_f_utf_character_is_combining_
-  f_status_t f_utf_character_is_combining(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_combining(character);
-    }
-
-    // There are no combining characters in ASCII.
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_combining_
-
-#ifndef _di_f_utf_character_is_control_
-  f_status_t f_utf_character_is_control(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_control(character);
-    }
-
-    if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_control_
-
-#ifndef _di_f_utf_character_is_control_code_
-  f_status_t f_utf_character_is_control_code(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_control_code(character);
-    }
-
-    if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_control_code_
-
-#ifndef _di_f_utf_character_is_control_picture_
-  f_status_t character_is_control_format(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_control_format(character);
-    }
-
-    // There are no control format characters in ASCII.
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_control_format_
-
-#ifndef _di_f_utf_character_is_control_picture_
-  f_status_t f_utf_character_is_control_picture(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_control_picture(character);
-    }
-
-    // There are no control picture characters in ASCII.
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_control_picture_
-
-#ifndef _di_f_utf_character_is_digit_
-  f_status_t f_utf_character_is_digit(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_digit(character);
-    }
-
-    if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_digit_
-
-#ifndef _di_f_utf_character_is_emoji_
-  f_status_t f_utf_character_is_emoji(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_emoji(character);
-    }
-
-    if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_emoji_
-
-#ifndef _di_f_utf_character_is_fragment_
-  f_status_t f_utf_character_is_fragment(const f_utf_character_t character) {
-
-    return macro_f_utf_character_t_width_is(character) == 1;
-  }
-#endif // _di_f_utf_character_is_fragment_
-
-#ifndef _di_f_utf_character_is_graph_
-  f_status_t f_utf_character_is_graph(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      if (private_f_utf_character_is_control(character)) {
-        return F_false;
-      }
-
-      if (private_f_utf_character_is_whitespace(character)) {
-        return F_false;
-      }
-
-      if (private_f_utf_character_is_zero_width(character)) {
-        return F_false;
-      }
-
-      return F_true;
-    }
-
-    if (isgraph(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_graph_
-
-#ifndef _di_f_utf_character_is_numeric_
-  f_status_t f_utf_character_is_numeric(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_numeric(character);
-    }
-
-    if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_numeric_
-
-#ifndef _di_f_utf_character_is_phonetic_
-  f_status_t f_utf_character_is_phonetic(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_phonetic(character);
-    }
-
-    // There are no ASCII phonetic characters.
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_phonetic_
-
-#ifndef _di_f_utf_character_is_private_
-  f_status_t f_utf_character_is_private(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_private(character);
-    }
-
-    // There are no ASCII private characters.
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_phonetic_
-
-#ifndef _di_f_utf_character_is_punctuation_
-  f_status_t f_utf_character_is_punctuation(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_punctuation(character);
-    }
-
-    // ASCII: '!' to '#'.
-    if (character > 0x20000000 && character < 0x24000000) {
-      return F_true;
-    }
-
-    // ASCII: '%' to '*'.
-    if (character > 0x24000000 && character < 0x2b000000) {
-      return F_true;
-    }
-
-    // ASCII: ',' to '/'.
-    if (character > 0x2b000000 && character < 0x30000000) {
-      return F_true;
-    }
-
-    // ASCII: ':', ';', '?', or '@'.
-    if (character == 0x3a000000 || character == 0x3b000000 || character == 0x3f000000 || character == 0x40000000) {
-      return F_true;
-    }
-
-    // ASCII: '[' to ']'.
-    if (character > 0x5a000000 && character < 0x5d000000) {
-      return F_true;
-    }
-
-    // ASCII: '_', '{', or '}'.
-    if (character == 0x5f000000 || character == 0x7b000000 || character == 0x7d000000) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_punctuation_
-
-#ifndef _di_f_utf_character_is_symbol_
-  f_status_t f_utf_character_is_symbol(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_symbol(character);
-    }
-
-    // ASCII: '$' or '+'.
-    if (character == 0x24000000 || character == 0x2b000000) {
-      return F_true;
-    }
-
-    // ASCII: '<' to '>'.
-    if (character > 0x3c000000 && character < 0x3e000000) {
-      return F_true;
-    }
-
-    // ASCII: '^', '`', '|', or '~'.
-    if (character == 0x5e000000 || character == 0x60000000 || character == 0x7c000000 || character == 0x7e000000) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_symbol_
-
-#ifndef _di_f_utf_character_is_unassigned_
-  f_status_t f_utf_character_is_unassigned(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_unassigned(character);
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_unassigned_
-
-#ifndef _di_f_utf_character_is_valid_
-  f_status_t f_utf_character_is_valid(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_valid(character);
-    }
-
-    return F_true;
-  }
-#endif // _di_f_utf_character_is_valid_
-
-#ifndef _di_f_utf_character_is_whitespace_
-  f_status_t f_utf_character_is_whitespace(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_whitespace(character);
-    }
-
-    if (isspace(macro_f_utf_character_t_to_char_1(character))) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_whitespace_
-
-#ifndef _di_f_utf_character_is_whitespace_modifier_
-  f_status_t f_utf_character_is_whitespace_modifier(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_whitespace_modifier(character);
-    }
-
-    // There are no ASCII whitespace modifiers.
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_whitespace_modifier_
-
-#ifndef _di_f_utf_character_is_whitespace_other_
-  f_status_t f_utf_character_is_whitespace_other(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_whitespace_other(character);
-    }
-
-    // There are no ASCII whitespace other.
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_whitespace_other_
-
-#ifndef _di_f_utf_character_is_wide_
-  f_status_t f_utf_character_is_wide(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_wide(character);
-    }
-
-    // There are no wide ASCII characters.
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_wide_
-
-#ifndef _di_f_utf_character_is_word_
-  f_status_t f_utf_character_is_word(const f_utf_character_t character, const bool strict) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_word(character, strict);
-    }
-
-    if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0]) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_word_
-
-#ifndef _di_f_utf_character_is_word_dash_
-  f_status_t f_utf_character_is_word_dash(const f_utf_character_t character, const bool strict) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_word_dash(character, strict);
-    }
-
-    if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0] || character == f_string_ascii_minus_s.string[0]) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_word_dash_
-
-#ifndef _di_f_utf_character_is_word_dash_plus_
-  f_status_t f_utf_character_is_word_dash_plus(const f_utf_character_t character, const bool strict) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_word_dash_plus(character, strict);
-    }
-
-    if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0] || character == f_string_ascii_minus_s.string[0] || character == f_string_ascii_plus_s.string[0]) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_word_dash_plus_
-
-#ifndef _di_f_utf_character_is_zero_width_
-  f_status_t f_utf_character_is_zero_width(const f_utf_character_t character) {
-
-    if (macro_f_utf_character_t_width_is(character)) {
-      if (macro_f_utf_character_t_width_is(character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return private_f_utf_character_is_zero_width(character);
-    }
-
-    const uint8_t ascii = macro_f_utf_character_t_to_char_1(character);
-
-    // These control characters are considered zero-width spaces.
-    if (ascii >= 0x00 && ascii <= 0x08) {
-      return F_true;
-    }
-    else if (ascii == 0x0a) {
-      return F_true;
-    }
-    else if (ascii >= 0x0c && ascii <= 0x1f) {
-      return F_true;
-    }
-    else if (ascii == 0x7f) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_character_is_zero_width_
-
-#ifndef _di_f_utf_character_to_char_
-  f_status_t f_utf_character_to_char(const f_utf_character_t utf_character, f_string_t *character, f_array_length_t *width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (!utf_character) return F_status_set_error(F_parameter);
-      if (!character) return F_status_set_error(F_parameter);
-      if (!width_max) return F_status_set_error(F_parameter);
-      if (!*width_max) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_character_t_width_is(utf_character)) {
-      if (macro_f_utf_character_t_width_is(utf_character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      #if __BYTE_ORDER == __LITTLE_ENDIAN
-        uint32_t utf = 0;
-
-        switch (macro_f_utf_character_t_width_is(utf_character)) {
-          case 1:
-            utf = macro_f_utf_character_t_to_char_1(utf_character) << 24;
-            break;
-          case 2:
-            utf = (macro_f_utf_character_t_to_char_2(utf_character) << 24) | (macro_f_utf_character_t_to_char_1(utf_character) << 16);
-            break;
-          case 3:
-            utf = (macro_f_utf_character_t_to_char_3(utf_character) << 24) | (macro_f_utf_character_t_to_char_2(utf_character) << 16) | (macro_f_utf_character_t_to_char_1(utf_character) << 8);
-            break;
-          case 4:
-            utf = (macro_f_utf_character_t_to_char_4(utf_character) << 24) | (macro_f_utf_character_t_to_char_3(utf_character) << 16) | (macro_f_utf_character_t_to_char_2(utf_character) << 8) | macro_f_utf_character_t_to_char_1(utf_character);
-            break;
-          default:
-            return F_status_set_error(F_failure);
-        }
-
-        memcpy(*character, &utf, sizeof(f_char_t) * macro_f_utf_character_t_width_is(utf_character));
-      #else
-        memcpy(*character, &utf_character, sizeof(f_char_t) * macro_f_utf_character_t_width_is(utf_character));
-      #endif // __BYTE_ORDER == __LITTLE_ENDIAN
-
-      return F_none;
-    }
-
-    #if __BYTE_ORDER == __LITTLE_ENDIAN
-      uint32_t utf = macro_f_utf_character_t_to_char_1(utf_character) << 24;
-
-      memcpy(*character, &utf, sizeof(f_char_t));
-    #else
-      memcpy(*character, &utf_character, sizeof(f_char_t));
-    #endif // __BYTE_ORDER == __LITTLE_ENDIAN
-
-    return F_none;
-  }
-#endif // _di_f_utf_character_to_char_
-
-#ifndef _di_f_utf_character_unicode_to_
-  f_status_t f_utf_character_unicode_to(const f_utf_character_t character, uint32_t *unicode) {
-    #ifndef _di_level_0_parameter_checking_
-      if (!unicode) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    return private_f_utf_character_unicode_to(character, unicode);
-  }
-#endif // _di_f_utf_character_unicode_to_
-
-#ifndef _di_f_utf_character_unicode_from_
-  f_status_t f_utf_character_unicode_from(const uint32_t unicode, f_utf_character_t *character) {
-    #ifndef _di_level_0_parameter_checking_
-      if (!character) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (unicode > 0x10ffff) {
-      return F_status_set_error(F_utf);
-    }
-
-    // U+0000 -> U+007F.
-    if (unicode < 0x80) {
-      *character = unicode;
-    }
-
-    // U+0080 -> U+07FF.
-    else if (unicode < 0x800) {
-      *character = (unicode & 0x7c0) << 2;
-      *character |= unicode & 0x3f;
-      *character |= 0xc080;
-    }
-
-    // U+0800 -> U+FFFF.
-    else if (unicode < 0x10000) {
-      *character = (unicode & 0xf000) << 4;
-      *character |= (unicode & 0xfc0) << 2;
-      *character |= unicode & 0x3f;
-      *character |= 0xe08080;
-    }
-
-    // U+100000 -> U+10FFFF.
-    else {
-      *character = (unicode & 0x1c0000) << 6;
-      *character |= (unicode & 0x3f000) << 4;
-      *character |= (unicode & 0xfc0) << 2;
-      *character |= unicode & 0x3f;
-      *character |= 0xe0808080;
-    }
-
-    return F_none;
-  }
-#endif // _di_f_utf_character_unicode_from_
-
-#ifndef _di_f_utf_character_unicode_string_to_
-  f_status_t f_utf_character_unicode_string_to(const f_utf_string_t string, const f_array_length_t length, uint32_t *unicode) {
-    #ifndef _di_level_0_parameter_checking_
-      if (!string) return F_status_set_error(F_parameter);
-      if (!unicode) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    f_array_length_t i = 0;
-
-    while (i < length && !string[i]) {
-      ++i;
-    } // while
-
-    if (i < length) {
-      if (macro_f_utf_character_t_width_is(string[i])) {
-        i = length;
-      }
-      else {
-        if (macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_u_s.string[0] || macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_U_s.string[0]) {
-          do {
-            ++i;
-          } while (i < length && !string[i]);
-
-          if (i < length && !macro_f_utf_character_t_width_is(string[i]) && macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_plus_s.string[0]) {
-            ++i;
-          }
-          else {
-            i = length;
-          }
-        }
-        else {
-          i = length;
-        }
-      }
-    }
-
-    if (i == length) {
-      return F_status_set_error(F_valid_not);
-    }
-
-    uint32_t value = 0;
-    uint8_t character = 0;
-
-    for (; i < length; ++i) {
-
-      if (!string[i]) continue;
-
-      // Only ASCII character numbers are allowed to represent
-      if (macro_f_utf_character_t_width_is(string[i])) {
-        return F_status_set_error(F_valid_not);
-      }
-
-      value *= 16;
-      character = macro_f_utf_character_t_to_char_1(string[i]);
-
-      if (character > 0x2f && character < 0x3a) {
-        value += character - 0x30;
-      }
-      else if (character > 0x40 && character < 0x47) {
-        value += (character - 0x41) + 10;
-      }
-      else if (character > 0x60 && character < 0x67) {
-        value += (character - 0x61) + 10;
-      }
-      else {
-        return F_status_set_error(F_valid_not);
-      }
-    } // for
-
-    *unicode = value;
-
-    return F_none;
-  }
-#endif // _di_f_utf_character_unicode_string_to_
-
-#ifndef _di_f_utf_is_
-  f_status_t f_utf_is(const f_string_t character) {
-
-    return macro_f_utf_byte_width_is(*character);
-  }
-#endif // _di_f_utf_is_
-
-#ifndef _di_f_utf_is_alpha_
-  f_status_t f_utf_is_alpha(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_alpha(character_utf);
-    }
-
-    if (isalpha(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_alpha_
-
-#ifndef _di_f_utf_is_alpha_digit_
-  f_status_t f_utf_is_alpha_digit(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_alpha_digit(character_utf);
-    }
-
-    if (isalnum(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_alpha_digit_
-
-#ifndef _di_f_utf_is_alpha_numeric_
-  f_status_t f_utf_is_alpha_numeric(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_alpha_numeric(character_utf);
-    }
-
-    if (isalnum(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_alpha_numeric_
-
-#ifndef _di_f_utf_is_ascii_
-  f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      return F_false;
-    }
-
-    return F_true;
-  }
-#endif // _di_f_utf_is_ascii_
-
-#ifndef _di_f_utf_is_combining_
-  f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_combining(character_utf);
-    }
-
-    // There are no ASCII combining characters.
-    return F_false;
-  }
-#endif // _di_f_utf_is_combining_
-
-#ifndef _di_f_utf_is_control_
-  f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_control(character_utf);
-    }
-
-    return iscntrl(*character);
-  }
-#endif // _di_f_utf_is_control_
-
-#ifndef _di_f_utf_is_control_code
-  f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_control_code(character_utf);
-    }
-
-    if (iscntrl(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_control_code_
-
-#ifndef _di_f_utf_is_control_format_
-  f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_control_format(character_utf);
-    }
-
-    // There are no ASCII control formats.
-    return F_false;
-  }
-#endif // _di_f_utf_is_control_format_
-
-#ifndef _di_f_utf_is_control_picture_
-  f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) != 3) {
-        return F_false;
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_control_picture(character_utf);
-    }
-
-    // There are no ASCII control pictures.
-    return F_false;
-  }
-#endif // _di_f_utf_is_control_picture_
-
-#ifndef _di_f_utf_is_digit_
-  f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_digit(character_utf);
-    }
-
-    if (isdigit(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_digit_
-
-#ifndef _di_f_utf_is_emoji_
-  f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_emoji(character_utf);
-    }
-
-    if (isdigit(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_emoji_
-
-#ifndef _di_f_utf_is_fragment_
-  f_status_t f_utf_is_fragment(const f_string_t character) {
-
-    if (macro_f_utf_byte_width_is(*character) == 1) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_fragment_
-
-#ifndef _di_f_utf_is_graph_
-  f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      if (private_f_utf_character_is_control(character_utf)) {
-        return F_false;
-      }
-
-      if (private_f_utf_character_is_whitespace(character_utf)) {
-        return F_false;
-      }
-
-      // Zero-width characters are be treated as a non-graph.
-      if (private_f_utf_character_is_zero_width(character_utf)) {
-        return F_false;
-      }
-
-      return F_true;
-    }
-
-    if (isgraph(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_graph_
-
-#ifndef _di_f_utf_is_numeric_
-  f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_numeric(character_utf);
-    }
-
-    if (isdigit(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_numeric_
-
-#ifndef _di_f_utf_is_phonetic_
-  f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_phonetic(character_utf);
-    }
-
-    // There are no ASCII phonetic characters.
-    return F_false;
-  }
-#endif // _di_f_utf_is_phonetic_
-
-#ifndef _di_f_utf_is_private_
-  f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_private(character_utf);
-    }
-
-    // There are no ASCII private characters.
-    return F_false;
-  }
-#endif // _di_f_utf_is_private_
-
-#ifndef _di_f_utf_is_punctuation_
-  f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_punctuation(character_utf);
-    }
-
-    // ASCII: '!' to '#'.
-    if (character[0] > 0x20 && character[0] < 0x24) {
-      return F_true;
-    }
-
-    // ASCII: '%' to '*'.
-    if (character[0] > 0x24 && character[0] < 0x2b) {
-      return F_true;
-    }
-
-    // ASCII: ',' to '/'.
-    if (character[0] > 0x2b && character[0] < 0x30) {
-      return F_true;
-    }
-
-    // ASCII: ':', ';', '?', or '@'.
-    if (character[0] == 0x3a || character[0] == 0x3b || character[0] == 0x3f || character[0] == 0x40) {
-      return F_true;
-    }
-
-    // ASCII: '[' to ']'.
-    if (character[0] > 0x5a && character[0] < 0x5d) {
-      return F_true;
-    }
-
-    // ASCII: '_', '{', or '}'.
-    if (character[0] == 0x5f || character[0] == 0x7b || character[0] == 0x7d) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_punctuation_
-
-#ifndef _di_f_utf_is_symbol_
-  f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_symbol(character_utf);
-    }
-
-    // ASCII: '$' or '+'.
-    if (character[0] == 0x24 || character[0] == 0x2b) {
-      return F_true;
-    }
-
-    // ASCII: '<' to '>'.
-    if (character[0] > 0x3c && character[0] < 0x3e) {
-      return F_true;
-    }
-
-    // ASCII: '^', '`', '|', or '~'.
-    if (character[0] == 0x5e || character[0] == 0x60 || character[0] == 0x7c || character[0] == 0x7e) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_symbol_
-
-#ifndef _di_f_utf_is_surrogate_
-  f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_surrogate(character_utf);
-    }
-
-    // ASCII are never surrogate.
-    return F_false;
-  }
-#endif // _di_f_utf_is_surrogate_
-
-#ifndef _di_f_utf_is_unassigned_
-  f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_unassigned(character_utf);
-    }
-
-    // ASCII are never unassigned.
-    return F_false;
-  }
-#endif // _di_f_utf_is_unassigned_
-
-#ifndef _di_f_utf_is_valid_
-  f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_valid(character_utf);
-    }
-
-    // ASCII are valid.
-    return F_true;
-  }
-#endif // _di_f_utf_is_valid_
-
-#ifndef _di_f_utf_is_whitespace_
-  f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_whitespace(character_utf);
-    }
-
-    if (isspace(*character)) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_whitespace_
-
-#ifndef _di_f_utf_is_whitespace_modifier_
-  f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_whitespace_modifier(character_utf);
-    }
-
-    // There are no ASCII whitespace modifiers.
-    return F_false;
-  }
-#endif // _di_f_utf_is_whitespace_modifier_
-
-#ifndef _di_f_utf_is_whitespace_other_
-  f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_whitespace_other(character_utf);
-    }
-
-    // There are no ASCII whitespace other.
-    return F_false;
-  }
-#endif // _di_f_utf_is_whitespace_other_
-
-#ifndef _di_f_utf_is_wide_
-  f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max) {
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_wide(character_utf);
-    }
-
-    // There are no wide ASCII characters.
-    return F_false;
-  }
-#endif // _di_f_utf_is_wide_
-
-#ifndef _di_f_utf_is_word_
-  f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_word(character_utf, strict);
-    }
-
-    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0]) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_word_
-
-#ifndef _di_f_utf_is_word_dash_
-  f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_word_dash(character_utf, strict);
-    }
-
-    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0]) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_word_dash_
-
-#ifndef _di_f_utf_is_word_dash_plus_
-  f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_word_dash_plus(character_utf, strict);
-    }
-
-    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0] || *character == f_string_ascii_plus_s.string[0]) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_word_dash_plus_
-
-#ifndef _di_f_utf_is_zero_width_
-  f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
-      }
-
-      if (macro_f_utf_byte_width_is(*character) == 1) {
-        return F_status_set_error(F_utf_fragment);
-      }
-
-      f_utf_character_t character_utf = 0;
-
-      {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-        if (F_status_is_error(status)) return status;
-      }
-
-      return private_f_utf_character_is_zero_width(character_utf);
-    }
-
-    // These control characters are considered zero-width spaces.
-    if (*character >= 0x00 && *character <= 0x08) {
-      return F_true;
-    }
-    else if (*character >= 0x0c && *character <= 0x1f) {
-      return F_true;
-    }
-    else if (*character == 0x7f) {
-      return F_true;
-    }
-
-    return F_false;
-  }
-#endif // _di_f_utf_is_zero_width_
-
-#ifndef _di_f_utf_unicode_from_
-  f_status_t f_utf_unicode_from(const uint32_t unicode, const f_array_length_t width_max, f_string_t *character) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-      if (!unicode) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    // @fixme the code here needs to be reviewed for endianess accuracy for both big and little endian.
-    if (unicode > 0x10ffff) {
-      return F_status_set_error(F_utf);
-    }
-
-    if (unicode < 0x80) {
-
-      // U+0000 -> U+007F
-      (*character)[0] = (uint8_t) unicode;
-
-      if (width_max > 1) {
-        (*character)[1] = 0;
-
-        if (width_max > 2) {
-          (*character)[2] = 0;
-
-          if (width_max > 3) {
-            (*character)[3] = 0;
-          }
-        }
-      }
-    }
-    else if (unicode < 0x800) {
-      if (width_max < 2) {
-        return F_status_set_error(F_utf);
-      }
-
-      // U+0080 -> U+07FF
-      (*character)[0] = F_utf_byte_2_d | ((uint8_t) ((unicode & 0x7c0) >> 6));
-      (*character)[1] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
-
-      if (width_max > 2) {
-        (*character)[2] = 0;
-
-        if (width_max > 2) {
-          (*character)[2] = 0;
-        }
-      }
-    }
-    else if (unicode < 0x10000) {
-      if (width_max < 3) {
-        return F_status_set_error(F_utf);
-      }
-
-      // U+0800 -> U+FFFF
-      (*character)[0] = F_utf_byte_3_d | ((uint8_t) ((unicode & 0xf000) >> 12));
-      (*character)[1] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0xfc0) >> 6));
-      (*character)[2] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
-
-      if (width_max > 3) {
-        character[3] = 0;
-      }
-    }
-    else {
-      if (width_max < 4) {
-        return F_status_set_error(F_utf);
-      }
-
-      // U+10000 -> U+10FFFF
-      (*character)[0] = F_utf_byte_4_d | ((uint8_t) ((unicode & 0x1c0000) >> 18));
-      (*character)[1] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0x3f000) >> 12));
-      (*character)[2] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0xfc0) >> 6));
-      (*character)[3] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
-    }
-
-    return F_none;
-  }
-#endif // _di_f_utf_unicode_from_
-
-#ifndef _di_f_utf_unicode_to_
-  f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, uint32_t *unicode) {
-    #ifndef _di_level_0_parameter_checking_
-      if (width_max < 1) return F_status_set_error(F_parameter);
-      if (!unicode) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    f_utf_character_t character_utf = 0;
-
-    {
-      const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
-      if (F_status_is_error(status)) return status;
-    }
-
-    return private_f_utf_character_unicode_to(character_utf, unicode);
-  }
-#endif // _di_f_utf_unicode_to_
-
-#ifndef _di_f_utf_unicode_string_to_f_
-  f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, uint32_t *unicode) {
-    #ifndef _di_level_0_parameter_checking_
-      if (!unicode) return F_status_set_error(F_parameter);
-    #endif // _di_level_0_parameter_checking_
-
-    f_array_length_t i = 0;
-
-    while (i < length && !string[i]) {
-      ++i;
-    } // while
-
-    if (i < length) {
-      if (string[i] == f_string_ascii_u_s.string[0] || string[i] == f_string_ascii_U_s.string[0]) {
-        do {
-          ++i;
-        } while (i < length && !string[i]);
-
-        if (i < length && string[i] == f_string_ascii_plus_s.string[0]) {
-          ++i;
-        }
-        else {
-          i = length;
-        }
-      }
-      else {
-        i = length;
-      }
-    }
-
-    if (i == length) {
-      return F_status_set_error(F_valid_not);
-    }
-
-    uint32_t value = 0;
-
-    for (; i < length; ++i) {
-
-      if (!string[i]) continue;
-
-      value *= 16;
-
-      if (string[i] > 0x2f && string[i] < 0x3a) {
-        value += string[i] - 0x30;
-      }
-      else if (string[i] > 0x40 && string[i] < 0x47) {
-        value += (string[i] - 0x41) + 10;
-      }
-      else if (string[i] > 0x60 && string[i] < 0x67) {
-        value += (string[i] - 0x61) + 10;
-      }
-      else {
-        return F_status_set_error(F_valid_not);
-      }
-    } // for
-
-    if (value > 0x10ffff) {
-      return F_status_set_error(F_valid_not);
-    }
-
-    *unicode = value;
-
-    return F_none;
-  }
-#endif // _di_f_utf_unicode_string_to_
-
  #ifdef __cplusplus
  } // extern "C"
  #endif
diff --git a/level_0/f_utf/c/utf.h b/level_0/f_utf/c/utf.h

index 3c14279ba075c5b51dfdfcf582f059dcfff76e45..87061afe434930c8a52500778f7ca10751538782 100644 (file)
--- a/level_0/f_utf/c/utf.h
+++ b/level_0/f_utf/c/utf.h
@@ -47,7 +47,10 @@
  
  // FLL-0 utf includes.
  #include <fll/level_0/utf/common.h>
+#include <fll/level_0/utf/convert.h>
  #include <fll/level_0/utf/dynamic.h>
+#include <fll/level_0/utf/is.h>
+#include <fll/level_0/utf/is_character.h>
  #include <fll/level_0/utf/map.h>
  #include <fll/level_0/utf/string.h>
  #include <fll/level_0/utf/triple.h>
@@ -121,1605 +124,6 @@ extern "C" {
    extern f_status_t f_utf_buffer_increment(const f_string_static_t buffer, f_string_range_t *range, const f_array_length_t step);
  #endif // _di_f_utf_buffer_increment_
  
-/**
- * Check to see if the entire byte block of the character is a non-ASCII UTF-8 character.
- *
- * This does not validate if the UTF-8 character is a valid UTF-8 character, for that use f_utf_character_is_valid().
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 character.
- *   F_false if not a UTF-8 character.
- *   F_utf_fragment if this is a UTF-8 character fragment.
- *
- * @see f_utf_character_is_valid()
- */
-#ifndef _di_f_utf_character_is_
-  extern f_status_t f_utf_character_is(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 alphabet character.
- *   F_false if not a UTF-8 alphabet character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalpha()
- */
-#ifndef _di_f_utf_character_is_alpha_
-  extern f_status_t f_utf_character_is_alpha(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_alpha_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
- *
- * Digit characters are decimal digits and letter numbers.
- *
- * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 alpha-digit character.
- *   F_false if not a UTF-8 alpha-digit character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_alpha_digit_
-  extern f_status_t f_utf_character_is_alpha_digit(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_alpha_digit_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character.
- *
- * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 alpha-numeric character.
- *   F_false if not a UTF-8 alpha-numeric character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_alpha_numeric_
-  extern f_status_t f_utf_character_is_alpha_numeric(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_alpha_numeric_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII character.
- *
- * This does not validate whether the UTF-8 character is valid or not.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if an ASCII character.
- *   F_false if not an ASCII character.
- */
-#ifndef _di_f_utf_character_is_ascii_
-  extern f_status_t f_utf_character_is_ascii(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_ascii_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 combining character.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 combining character.
- *   F_false if not a UTF-8 combining character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_combining_
-  extern f_status_t f_utf_character_is_combining(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_combining_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
- *
- * This includes control code and control format characters.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 control character.
- *   F_false if not a UTF-8 control character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
- */
-#ifndef _di_f_utf_character_is_control_
-  extern f_status_t f_utf_character_is_control(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_control_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 control code character.
- *
- * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 control code character.
- *   F_false if not a UTF-8 control code character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
- */
-#ifndef _di_f_utf_character_is_control_code_
-  extern f_status_t f_utf_character_is_control_code(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_control_code_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control format character.
- *
- * Control Format characters are special characters used for formatting.
- * These are considered control characters.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 control format character.
- *   F_false if not a UTF-8 control format character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_control_format_
-  extern f_status_t f_utf_character_is_control_format(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_control_format_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control picture character.
- *
- * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 control picture character.
- *   F_false if not a UTF-8 control picture character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_control_picture_
-  extern f_status_t f_utf_character_is_control_picture(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_control_picture_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
- *
- * Digit characters are decimal digits and letter numbers.
- *
- * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 digit character.
- *   F_false if not a UTF-8 digit character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isdigit()
- */
-#ifndef _di_f_utf_character_is_digit_
-  extern f_status_t f_utf_character_is_digit(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_digit_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 emoji character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 emoji character.
- *   F_false if not a UTF-8 emoji character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_emoji_
-  extern f_status_t f_utf_character_is_emoji(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_emoji_
-
-/**
- * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
- *
- * Characters whose width is 1-byte are invalid.
- * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
- *
- * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
- *
- * According to rfc3629, the valid octect sequences for UTF-8 are:
- *   UTF8-octets = *( UTF8-char )
- *   UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
- *   UTF8-1      = %x00-7F
- *   UTF8-2      = %xC2-DF UTF8-tail
- *   UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
- *                 %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
- *   UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
- *                 %xF4 %x80-8F 2( UTF8-tail )
- *   UTF8-tail   = %x80-BF
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 character.
- *   F_false if not a UTF-8 character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is()
- * @see f_utf_character_is_valid()
- */
-#ifndef _di_f_utf_character_is_fragment_
-  extern f_status_t f_utf_character_is_fragment(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_fragment_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 graph.
- *   F_false if not a UTF-8 graph.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isgraph()
- */
-#ifndef _di_f_utf_character_is_graph_
-  extern f_status_t f_utf_character_is_graph(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_graph_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character.
- *
- * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 numeric character.
- *   F_false if not a UTF-8 numeric character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isdigit()
- */
-#ifndef _di_f_utf_character_is_numeric_
-  extern f_status_t f_utf_character_is_numeric(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_numeric_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 phonetic character.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 phonetic character.
- *   F_false if not a UTF-8 phonetic character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_phonetic_
-  extern f_status_t f_utf_character_is_phonetic(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_phonetic_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 private character.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 private character.
- *   F_false if not a UTF-8 private character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_private_
-  extern f_status_t f_utf_character_is_private(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_private_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 punctuation character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 punctuation character.
- *   F_false if not a UTF-8 punctuation character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_punctuation_
-  extern f_status_t f_utf_character_is_punctuation(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_punctuation_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 symbol character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 symbol character.
- *   F_false if not a UTF-8 symbol character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_symbol_
-  extern f_status_t f_utf_character_is_symbol(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_symbol_
-
-/**
- * Check to see if the entire byte block of the character is a unassigned (well-formed) UTF-8 character.
- *
- * The Surrogates and Private Use are not considered unassigned.
- *
- * This does validate if the UTF-8 character is a unassigned UTF-8 character.
- * To not do this, use f_utf_character_is().
- *
- * @param character
- *   The character to unassignedate.
- *
- * @return
- *   F_true if a UTF-8 unassigned character.
- *   F_false if not a UTF-8 unassigned character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is()
- * @see f_utf_character_is_fragment()
- */
-#ifndef _di_f_utf_character_is_unassigned_
-  extern f_status_t f_utf_character_is_unassigned(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_value_
-
-/**
- * Check to see if the entire byte block of the character is a valid (well-formed) UTF-8 character.
- *
- * This does validate if the UTF-8 character is a valid UTF-8 character.
- * To not do this, use f_utf_character_is().
- *
- * ASCII character codes are considered valid by this function.
- *
- * Codes U+FDD0 to U+FDEF and any character ending in FFFE or FFFF are non-characters, and are therefore invalid.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 character.
- *   F_false if not a UTF-8 character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is()
- * @see f_utf_character_is_fragment()
- */
-#ifndef _di_f_utf_character_is_valid_
-  extern f_status_t f_utf_character_is_valid(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_value_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
- *
- * Non-printing or zero-width characters are not considered whitespace.
- * This does include line separators like '\n'.
- * This does not include phonetic spaces, like whitespace modifiers.
- * This does not include non-true whitespace characters, such as Ogham Space Mark ( ).
- *
- * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
- * However, because they are not renderred as whitespace, they are technically not white space.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 whitespace.
- *   F_false if not a UTF-8 whitespace.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isspace()
- */
-#ifndef _di_f_utf_character_is_whitespace_
-  extern f_status_t f_utf_character_is_whitespace(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_whitespace_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 whitespace modifier character.
- *
- * These are phonetic spaces.
- *
- * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
- * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 modifier character.
- *   F_false if not a UTF-8 modifier character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_whitespace_modifier_
-  extern f_status_t f_utf_character_is_whitespace_modifier(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_whitespace_modifier_
-
-/**
- * Check to see if the entire byte block of the character is an other type of UTF-8 space character.
- *
- * This is a list of whitespace that are not actual whitespace (because they are graph characters) but are considered whitespace, such as Ogham Space Mark ( ).
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 (other) whitespace.
- *   F_false if not a UTF-8 (other) whitespace.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isspace()
- */
-#ifndef _di_f_utf_character_is_whitespace_other_
-  extern f_status_t f_utf_character_is_whitespace_other(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_whitespace_other_
-
-/**
- * Get whether or not the UTF-8 character is a wide character on display.
- *
- * This is not the wide as in width in bytes that the codepoint takes up in UTF-8.
- * Instead, this is the width in characters on the screen the character takes up.
- * When "wide" characters that take up either 2 characters on render.
- * When "narrow" characters that take up either 1 character on render.
- *
- * @param character
- *   The (UTF-8) character.
- *
- * @return
- *   F_none on success.
- *
- *   F_failure (with error bit) if width is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_wide_
-  extern f_status_t f_utf_character_is_wide(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_wide_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
- *
- * A word character is alpha-numeric or an underscore '_'.
- *
- * @param character
- *   The character to validate.
- * @param strict
- *   When TRUE, include all appropriate characters by type as per Unicode.
- *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- *   When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- *   F_true if a UTF-8 word character.
- *   F_false if not a UTF-8 word character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_word_
-  extern f_status_t f_utf_character_is_word(const f_utf_character_t character, const bool strict);
-#endif // _di_f_utf_character_is_word_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
- *
- * A word dash character is alpha-numeric, an underscore '_' or a dash '-'.
- *
- * Unicode appears to refer to dashes that connect words as a hyphen.
- * Therefore, only these hyphens are considered dashes for the purposes of this function.
- * All other dash-like Unicode characters are not considered a dash here.
- * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
- *
- * @param character
- *   The character to validate.
- * @param strict
- *   When TRUE, include all appropriate characters by type as per Unicode.
- *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- *   When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- *   F_true if a UTF-8 word or dash character.
- *   F_false if not a UTF-8 word or dash character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_word_dash_
-  extern f_status_t f_utf_character_is_word_dash(const f_utf_character_t character, const bool strict);
-#endif // _di_f_utf_character_is_word_dash_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
- *
- * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
- *
- * Unicode appears to refer to dashes that connect words as a hyphen.
- * Therefore, only these hyphens are considered dashes for the purposes of this function.
- * All other dash-like Unicode characters are not considered a dash here.
- * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
- *
- * This does not include zero-width punctuation, such as "invisible plus" (U+2064) (even in strict mode).
- *
- * @param character
- *   The character to validate.
- * @param strict
- *   When TRUE, include all appropriate characters by type as per Unicode.
- *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- *   When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- *   F_true if a UTF-8 word or dash character.
- *   F_false if not a UTF-8 word or dash character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_character_is_word_dash_plus_
-  extern f_status_t f_utf_character_is_word_dash_plus(const f_utf_character_t character, const bool strict);
-#endif // _di_f_utf_character_is_word_dash_plus_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character.
- *
- * Only characters that do not print, which are generally called zero-width.
- *
- * @param character
- *   The character to validate.
- *
- * @return
- *   F_true if a UTF-8 non-printing or zero-width character.
- *   F_false if not a UTF-8 non-printing or zero-width character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_is_zero_width_
-  extern f_status_t f_utf_character_is_zero_width(const f_utf_character_t character);
-#endif // _di_f_utf_character_is_zero_width_
-
-/**
- * Convert a specialized f_utf_character_t type to a uint8_t, stored as a string (character buffer).
- *
- * This will also convert ASCII characters stored in the utf_character array.
- * This will not resize character.
- *
- * @param utf_character
- *   The UTF-8 character to convert from.
- * @param character
- *   A uint8_t representation of the UTF-8 character, stored as a string of width bytes.
- *   If width_max is 0, then this should be set to 0.
- * @param width_max
- *   This is set to the max number of bytes available.
- *   This is then updated to represent the max bytes used if enough space is available.
- *
- * @return
- *   F_none if conversion was successful.
- *
- *   F_failure (with error bit) if width is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_to_char_
-  extern f_status_t f_utf_character_to_char(const f_utf_character_t utf_character, f_string_t *character, f_array_length_t *width_max);
-#endif // _di_f_utf_character_to_char_
-
-/**
- * Convert a given (UTF-8) character into Unicode.
- *
- * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
- * The Unicode is a 32-bit integer representing the Unicode (such as U+0001).
- * The Unciode does not need to be interpretted like UTF-8, it simple is a sequence of number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @param character
- *   The (UTF-8) character.
- * @param unicode
- *   The Unicode number.
- *
- * @return
- *   F_none on success.
- *
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is_valid()
- */
-#ifndef _di_f_utf_character_unicode_to_
-  extern f_status_t f_utf_character_unicode_to(const f_utf_character_t character, uint32_t *unicode);
-#endif // _di_f_utf_character_unicode_to_
-
-/**
- * Convert a given Unicode into (UTF-8) character.
- *
- * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
- * The Unicode is a 32-bit integer representing the Unicode (such as U+0001).
- * The Unciode does not need to be interpretted like UTF-8, it simple is a sequence of number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @param unicode
- *   The Unicode number.
- * @param character
- *   The (UTF-8) character.
- *
- * @return
- *   F_none on success.
- *
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_character_unicode_from_
-  extern f_status_t f_utf_character_unicode_from(const uint32_t unicode, f_utf_character_t *character);
-#endif // _di_f_utf_character_unicode_from_
-
-/**
- * Convert a string of the format "U+FFFF" into the codepoint value.
- *
- * This ignores NULL characters.
- * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
- * The "U+" prefix is optional.
- * Only ASCII characters are allowed to represent the Unicode sequence string.
- *
- * @param string
- *   The string representing a Unicode sequence.
- * @param length
- *   The maximum number of characters.
- * @param unicode
- *   A 32-bit integer representing the Unicode (such as U+0001).
- *   Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- *   F_none on success.
- *
- *   F_failure (with error bit) if width_max is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_valid_not (with error bit) if string is not a valid Unicode string.
- */
-#ifndef _di_f_utf_character_unicode_string_to_
-  extern f_status_t f_utf_character_unicode_string_to(const f_utf_string_t string, const f_array_length_t length, uint32_t *unicode);
-#endif // _di_f_utf_character_unicode_string_to_
-
-/**
- * Check to see if the entire byte block of the character is a non-ASCII UTF-8 character.
- *
- * This does not check the validity of the character, for that instead use f_utf_is_valid().
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- *
- * @return
- *   F_true if a UTF-8 character.
- *   F_false if not a UTF-8 character.
- */
-#ifndef _di_f_utf_is_
-  extern f_status_t f_utf_is(const f_string_t character);
-#endif // _di_f_utf_is_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 alphabet character.
- *   F_false if not a UTF-8 alphabet character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalpha()
- */
-#ifndef _di_f_utf_is_alpha_
-  extern f_status_t f_utf_is_alpha(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_alpha_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or digit character.
- *
- * Digit characters are decimal digits and letter numbers.
- *
- * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 alphabet character.
- *   F_false if not a UTF-8 alpha-numeric character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_alpha_digit_
-  extern f_status_t f_utf_is_alpha_digit(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_alpha_digit_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character.
- *
- * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 alphabet character.
- *   F_false if not a UTF-8 alpha-numeric character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_alpha_numeric_
-  extern f_status_t f_utf_is_alpha_numeric(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_alpha_numeric_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if an ASCII character.
- *   F_false if not an ASCII character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_ascii_
-  extern f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_ascii_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 combining character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 combining character.
- *   F_false if not a UTF-8 combining character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_combining_
-  extern f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_combining_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
- *
- * This includes control code and control format characters.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 control character.
- *   F_false if not a UTF-8 control character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
- */
-#ifndef _di_f_utf_is_control_
-  extern f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_control_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control code character.
- *
- * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 control code character.
- *   F_false if not a UTF-8 control code character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_control_code_
-  extern f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_control_code_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control format character.
- *
- * Control Format characters are special characters used for formatting.
- * These are considered control characters.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 control format character.
- *   F_false if not a UTF-8 control format character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_control_format_
-  extern f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_control_format_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 control picture character.
- *
- * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 control picture character.
- *   F_false if not a UTF-8 control picture character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_control_picture_
-  extern f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_control_picture_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 digit character.
- *   F_false if not a UTF-8 digit character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isdigit()
- */
-#ifndef _di_f_utf_is_digit_
-  extern f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_digit_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 emoji character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 emoji character.
- *   F_false if not a UTF-8 emoji character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_emoji_
-  extern f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_emoji_
-
-/**
- * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
- *
- * Characters whose width is 1-byte are invalid.
- * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
- *
- * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
- *
- * According to rfc3629, the valid octect sequences for UTF-8 are:
- *   UTF8-octets = *( UTF8-char )
- *   UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
- *   UTF8-1      = %x00-7F
- *   UTF8-2      = %xC2-DF UTF8-tail
- *   UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
- *                 %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
- *   UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
- *                 %xF4 %x80-8F 2( UTF8-tail )
- *   UTF8-tail   = %x80-BF
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- *
- * @return
- *   F_true if a UTF-8 character.
- *   F_false if not a UTF-8 character.
- */
-#ifndef _di_f_utf_is_fragment_
-  extern f_status_t f_utf_is_fragment(const f_string_t character);
-#endif // _di_f_utf_is_fragment_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 graph.
- *   F_false if not a UTF-8 graph.
- *
- *   F_maybe (with error bit) if this could be a graph but width is not long enough.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isgraph()
- */
-#ifndef _di_f_utf_is_graph_
-  extern f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_graph_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character.
- *
- * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 numeric character.
- *   F_false if not a UTF-8 numeric character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isdigit()
- */
-#ifndef _di_f_utf_is_numeric_
-  extern f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_numeric_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 phonetic character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 phonetic character.
- *   F_false if not a UTF-8 phonetic character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_phonetic_
-  extern f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_phonetic_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 private character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 punctuation character.
- *   F_false if not a UTF-8 punctuation character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_private_
-  extern f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_private_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 punctuation character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 punctuation character.
- *   F_false if not a UTF-8 punctuation character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_punctuation_
-  extern f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_punctuation_
-
-/**
- * Check to see if the entire byte block of the character is a surrogate UTF-8 character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 symbol character.
- *   F_false if not a UTF-8 symbol character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_surrogate_
-  extern f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_surrogate_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 symbol character.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 symbol character.
- *   F_false if not a UTF-8 symbol character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_symbol_
-  extern f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_symbol_
-
-/**
- * Check to see if the entire byte block of the character is a unassigned UTF-8 character.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if an unassigned UTF-8 character.
- *   F_false if not an unassigned UTF-8 character.
- *
- *   F_parameter (with error bit) if a parameter is inunassigned.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_unassigned_
-  extern f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_unassigned_
-
-/**
- * Check to see if the entire byte block of the character is a valid (well-formed) UTF-8 character.
- *
- * This does validate if the UTF-8 character is a valid UTF-8 character.
- * To not do this, use f_utf_is().
- *
- * Valid ASCII character codes are considered valid by this function.
- *
- * Codes U+FDD0 to U+FDEF and any character ending in FFFE or FFFF are non-characters, and are therefore invalid.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a valid UTF-8 character or is an ASCII character.
- *   F_false if not a valid UTF-8 character.
- *
- *   F_failure (with error bit) if width_max is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_valid_
-  extern f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_valid_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
- *
- * Non-printing or zero-width characters are not considered whitespace.
- * This does include line separators like '\n'.
- * This does not include phonetic spaces, like whitespace modifiers.
- * This does not include non-true whitespace characters, such as Ogham Space Mark ( ).
- *
- * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
- * However, because they are not renderred as whitespace, they are technically not white space.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 whitespace.
- *   F_false if not a UTF-8 whitespace.
- *
- *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isspace()
- */
-#ifndef _di_f_utf_is_whitespace_
-  extern f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_whitespace_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 whitespace modifier character.
- *
- * These are phonetic spaces.
- *
- * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
- * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 whitespace.
- *   F_false if not a UTF-8 whitespace.
- *
- *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_whitespace_modifier_
-  extern f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_whitespace_modifier_
-
-/**
- * Check to see if the entire byte block of the character is an other type of UTF-8 space character.
- *
- * This is a list of whitespace that are not actual whitespace (because they are graph characters) but are considered whitespace, such as Ogham Space Mark ( ).
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 whitespace.
- *   F_false if not a UTF-8 whitespace.
- *
- *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_whitespace_other_
-  extern f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_whitespace_other_
-
-/**
- * Get whether or not the UTF-8 character is a wide character on display.
- *
- * This is not the wide as in width in bytes that the codepoint takes up in UTF-8.
- * Instead, this is the width in characters on the screen the character takes up.
- * When "wide" characters that take up either 2 characters on render.
- * When "narrow" characters that take up either 1 character on render.
- *
- * @param character
- *   The (UTF-8) character.
- * @param width_max
- *   The max width available for representing the UTF-8 character.
- *   There must be enough space in the character buffer to handle the Unicode width.
- *   It is recommended to always have 4 characters (4 uint8_t) of space available in character.
- *   This is the width in bytes the codepoint takes up in UTF-8.
- *
- * @return
- *   F_none on success.
- *
- *   F_failure (with error bit) if width_max is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_wide_
-  extern f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_wide_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
- *
- * A word character is alpha-digit or an underscore '_'.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- * @param strict
- *   When TRUE, include all appropriate characters by type as per Unicode.
- *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- *   When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- *   F_true if a UTF-8 word character.
- *   F_false if not a UTF-8 word character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_word_
-  extern f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict);
-#endif // _di_f_utf_is_word_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
- *
- * A word dash character is alpha-digit, an underscore '_' or a dash '-'.
- *
- * Unicode appears to refer to dashes that connect words as a hyphen.
- * Therefore, only these hyphens are considered dashes for the purposes of this function.
- * All other dash-like Unicode characters are not considered a dash here.
- * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- * @param strict
- *   When TRUE, include all appropriate characters by type as per Unicode.
- *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- *   When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- *   F_true if a UTF-8 word or dash character.
- *   F_false if not a UTF-8 word or dash character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_word_dash_
-  extern f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict);
-#endif // _di_f_utf_is_word_dash_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
- *
- * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
- *
- * Unicode appears to refer to dashes that connect words as a hyphen.
- * Therefore, only these hyphens are considered dashes for the purposes of this function.
- * All other dash-like Unicode characters are not considered a dash here.
- * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
- *
- * This does not include zero-width punctuation, such as "invisible plus" (U+2064) (even in strict mode).
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- * @param strict
- *   When TRUE, include all appropriate characters by type as per Unicode.
- *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
- *   When FALSE, zero-width punctuation characters are not considered a character.
- *
- * @return
- *   F_true if a UTF-8 word or dash character.
- *   F_false if not a UTF-8 word or dash character.
- *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see isalnum()
- */
-#ifndef _di_f_utf_is_word_dash_plus_
-  extern f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict);
-#endif // _di_f_utf_is_word_dash_plus_
-
-/**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character.
- *
- * Only characters that do not print, which are generally called zero-width.
- *
- * @param character
- *   The character to validate.
- *   There must be enough space allocated to compare against, as limited by width_max.
- * @param width_max
- *   The maximum width available for checking.
- *   Can be anything greater than 0.
- *
- * @return
- *   F_true if a UTF-8 whitespace.
- *   F_false if not a UTF-8 whitespace.
- *
- *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_is_zero_width_
-  extern f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max);
-#endif // _di_f_utf_is_zero_width_
-
-/**
- * Convert an ASCII or UTF-8 character, stored as a string (character buffer), to the specialized f_utf_character_t type.
- *
- * @param character
- *   The character string to be converted to the f_utf_character_t type.
- *   There must be enough space allocated to convert against, as limited by width_max.
- * @param width_max
- *   The maximum width available for converting.
- *   Can be anything greater than 0.
- * @param character_utf
- *   The generated character of type f_utf_character_t.
- *   This value may be cleared, even on error.
- *
- * @return
- *   F_none if conversion was successful.
- *
- *   F_failure (with error bit) if width is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_char_to_character_
-  extern f_status_t f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_character_t *character_utf);
-#endif // _di_f_utf_char_to_character_
-
-/**
- * Convert a given Unicode into a string block representing a single character.
- *
- * @param character
- *   The (UTF-8) character.
- *   The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
- * @param width_max
- *   The max width available for representing the UTF-8 character.
- *   There must be enough space in the character buffer to handle the Unicode width.
- *   It is recommended to always have 4 characters (4 uint8_t) of space available in character.
- * @param unicode
- *   A 32-bit integer representing the Unicode (such as U+0001).
- *   Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- *   F_none on success.
- *
- *   F_failure (with error bit) if width_max is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- */
-#ifndef _di_f_utf_unicode_from_
-  extern f_status_t f_utf_unicode_from(const uint32_t unicode, const f_array_length_t width_max, f_string_t *character);
-#endif // _di_f_utf_unicode_from_
-
-/**
- * Convert a given string block representing a single character into Unicode.
- *
- * @param character
- *   The (UTF-8) character to convert to the Unicode representation.
- *   The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
- * @param width_max
- *   The max width available for representing the UTF-8 character.
- *   There must be enough space in the character buffer to handle the Unicode width.
- *   It is recommended to always have 4 characters (4 uint8_t) of space available in character.
- * @param unicode
- *   A 32-bit integer representing the Unicode (such as U+0001).
- *   Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- *   F_none on success.
- *
- *   F_failure (with error bit) if width is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see f_utf_character_is_valid()
- */
-#ifndef _di_f_utf_unicode_to_
-  extern f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, uint32_t *unicode);
-#endif // _di_f_utf_unicode_to_
-
-/**
- * Convert a string of the format "U+FFFF" into the codepoint value.
- *
- * This ignores NULL characters.
- * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
- * The "U+" prefix is optional.
- * Only ASCII characters are allowed to represent the Unicode sequence string.
- *
- * @param string
- *   The string representing a Unicode sequence.
- * @param length
- *   The maximum number of characters.
- * @param unicode
- *   A 32-bit integer representing the Unicode (such as U+0001).
- *   Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
- *
- * @return
- *   F_none on success.
- *
- *   F_failure (with error bit) if width_max is not long enough to convert.
- *   F_parameter (with error bit) if a parameter is invalid.
- *   F_valid_not (with error bit) if string is not a valid Unicode string.
- */
-#ifndef _di_f_utf_unicode_string_to_
-  extern f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, uint32_t *unicode);
-#endif // _di_f_utf_unicode_string_to_
-
  #ifdef __cplusplus
  } // extern "C"
  #endif
diff --git a/level_0/f_utf/c/utf/common.h b/level_0/f_utf/c/utf/common.h

index e3f395d9420f08593f08ae60967cb0a45cd598ac..1d6fb5fdfcde728f484b885dee0e17a9dfe61995 100644 (file)
--- a/level_0/f_utf/c/utf/common.h
+++ b/level_0/f_utf/c/utf/common.h
@@ -36,15 +36,15 @@ extern "C" {
   * The macro_f_utf_byte_width_is is identical to macro_f_utf_byte_width, except it returns 0 when character is ASCII.
   */
  #ifndef _di_f_utf_byte_
-  #define F_utf_byte_1_d 0x80 // 1000 0000
-  #define F_utf_byte_2_d 0xc0 // 1100 0000
-  #define F_utf_byte_3_d 0xe0 // 1110 0000
-  #define F_utf_byte_4_d 0xf0 // 1111 0000
+  #define F_utf_byte_1_d 0x80u // 1000 0000
+  #define F_utf_byte_2_d 0xc0u // 1100 0000
+  #define F_utf_byte_3_d 0xe0u // 1110 0000
+  #define F_utf_byte_4_d 0xf0u // 1111 0000
  
-  #define F_utf_byte_off_1_d 0xc0 // 1100 0000
-  #define F_utf_byte_off_2_d 0xe0 // 1110 0000
-  #define F_utf_byte_off_3_d 0xf0 // 1111 0000
-  #define F_utf_byte_off_4_d 0xf8 // 1111 1000
+  #define F_utf_byte_off_1_d 0xc0u // 1100 0000
+  #define F_utf_byte_off_2_d 0xe0u // 1110 0000
+  #define F_utf_byte_off_3_d 0xf0u // 1111 0000
+  #define F_utf_byte_off_4_d 0xf8u // 1111 1000
  
    #define macro_f_utf_byte_is(character) ((character) & F_utf_byte_1_d)
  
@@ -148,6 +148,17 @@ extern "C" {
  #endif // _di_f_utf_substitute_
  
  /**
+ * Defines type for representing the UTF-8 code as a 32-bit unsigned integer.
+ */
+#ifndef _di_f_utf_t_
+  typedef uint32_t f_utf_t;
+
+  #define f_utf_t_initialize 0
+
+  #define macro_f_utf_initialize(code) code
+#endif // _di_f_utf_t_
+
+/**
   * Provide a basic UTF-8 character as a single 4-byte variable.
   *
   * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte character.
@@ -176,24 +187,24 @@ extern "C" {
  #ifndef _di_f_utf_character_t_
    typedef uint32_t f_utf_character_t;
  
-  #define F_utf_character_mask_byte_1_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000
-  #define F_utf_character_mask_byte_2_d 0xffff0000 // 1111 1111, 1111 1111, 0000 0000, 0000 0000
-  #define F_utf_character_mask_byte_3_d 0xffffff00 // 1111 1111, 1111 1111, 1111 1111, 0000 0000
-  #define F_utf_character_mask_byte_4_d 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111
+  #define F_utf_character_mask_byte_1_d 0xff000000u // 1111 1111, 0000 0000, 0000 0000, 0000 0000
+  #define F_utf_character_mask_byte_2_d 0xffff0000u // 1111 1111, 1111 1111, 0000 0000, 0000 0000
+  #define F_utf_character_mask_byte_3_d 0xffffff00u // 1111 1111, 1111 1111, 1111 1111, 0000 0000
+  #define F_utf_character_mask_byte_4_d 0xffffffffu // 1111 1111, 1111 1111, 1111 1111, 1111 1111
  
-  #define F_utf_character_mask_char_1_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000
-  #define F_utf_character_mask_char_2_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000
-  #define F_utf_character_mask_char_3_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000
-  #define F_utf_character_mask_char_4_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111
+  #define F_utf_character_mask_char_1_d 0xff000000u // 1111 1111, 0000 0000, 0000 0000, 0000 0000
+  #define F_utf_character_mask_char_2_d 0x00ff0000u // 0000 0000, 1111 1111, 0000 0000, 0000 0000
+  #define F_utf_character_mask_char_3_d 0x0000ff00u // 0000 0000, 0000 0000, 1111 1111, 0000 0000
+  #define F_utf_character_mask_char_4_d 0x000000ffu // 0000 0000, 0000 0000, 0000 0000, 1111 1111
  
-  #define macro_f_utf_character_t_to_char_1(character) (((character) & F_utf_character_mask_char_1_d) >> 24) // grab first byte.
-  #define macro_f_utf_character_t_to_char_2(character) (((character) & F_utf_character_mask_char_2_d) >> 16) // grab second byte.
-  #define macro_f_utf_character_t_to_char_3(character) (((character) & F_utf_character_mask_char_3_d) >> 8)  // grab third byte.
+  #define macro_f_utf_character_t_to_char_1(character) (((character) & F_utf_character_mask_char_1_d) >> 24u) // grab first byte.
+  #define macro_f_utf_character_t_to_char_2(character) (((character) & F_utf_character_mask_char_2_d) >> 16u) // grab second byte.
+  #define macro_f_utf_character_t_to_char_3(character) (((character) & F_utf_character_mask_char_3_d) >> 8u)  // grab third byte.
    #define macro_f_utf_character_t_to_char_4(character) ((character) & F_utf_character_mask_char_4_d)         // grab fourth byte.
  
-  #define macro_f_utf_character_t_from_char_1(character) (((character) << 24) & F_utf_character_mask_char_1_d) // shift to first byte.
-  #define macro_f_utf_character_t_from_char_2(character) (((character) << 16) & F_utf_character_mask_char_2_d) // shift to second byte.
-  #define macro_f_utf_character_t_from_char_3(character) (((character) << 8) & F_utf_character_mask_char_3_d)  // shift to third byte.
+  #define macro_f_utf_character_t_from_char_1(character) (((character) << 24u) & F_utf_character_mask_char_1_d) // shift to first byte.
+  #define macro_f_utf_character_t_from_char_2(character) (((character) << 16u) & F_utf_character_mask_char_2_d) // shift to second byte.
+  #define macro_f_utf_character_t_from_char_3(character) (((character) << 8u) & F_utf_character_mask_char_3_d)  // shift to third byte.
    #define macro_f_utf_character_t_from_char_4(character) ((character) & F_utf_character_mask_char_4_d)         // shift to fourth byte.
  
    #define macro_f_utf_character_t_width(character)    (macro_f_utf_byte_width(macro_f_utf_character_t_to_char_1(character)))
@@ -201,9 +212,9 @@ extern "C" {
  #endif // _di_f_utf_character_t_
  
  #ifndef _di_f_utf_character_t_codes_
-  #define F_utf_character_t_eol_d         0x0a000000 // 0000 1010, 0000 0000, 0000 0000, 0000 0000
-  #define F_utf_character_t_eos_d         0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000
-  #define F_utf_character_t_placeholder_d 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000
+  #define F_utf_character_t_eol_d         0x0a000000u // 0000 1010, 0000 0000, 0000 0000, 0000 0000
+  #define F_utf_character_t_eos_d         0x00000000u // 0000 0000, 0000 0000, 0000 0000, 0000 0000
+  #define F_utf_character_t_placeholder_d 0x00000000u // 0000 0000, 0000 0000, 0000 0000, 0000 0000
  #endif // _di_f_utf_character_t_codes_
  
  /**
diff --git a/level_0/f_utf/c/utf/convert.c b/level_0/f_utf/c/utf/convert.c

new file mode 100644 (file)

index 0000000..15b6f9a
--- /dev/null
+++ b/level_0/f_utf/c/utf/convert.c
@@ -0,0 +1,375 @@
+#include "../utf.h"
+#include "../private-utf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _di_f_utf_char_to_character_
+  f_status_t f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_character_t *character_utf) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+      if (!character_utf) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character) > width_max) {
+      return F_status_set_error(F_failure);
+    }
+
+    if (macro_f_utf_byte_width_is(*character) == 1) {
+      return F_status_set_error(F_utf_fragment);
+    }
+
+    return private_f_utf_char_to_character(character, width_max, character_utf);
+  }
+#endif // _di_f_utf_char_to_character_
+
+#ifndef _di_f_utf_character_to_char_
+  f_status_t f_utf_character_to_char(const f_utf_character_t utf_character, f_string_t *character, f_array_length_t *width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (!utf_character) return F_status_set_error(F_parameter);
+      if (!character) return F_status_set_error(F_parameter);
+      if (!width_max) return F_status_set_error(F_parameter);
+      if (!*width_max) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_character_t_width_is(utf_character)) {
+      if (macro_f_utf_character_t_width_is(utf_character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      #if __BYTE_ORDER == __LITTLE_ENDIAN
+        f_utf_t utf = 0;
+
+        switch (macro_f_utf_character_t_width_is(utf_character)) {
+          case 1:
+            utf = macro_f_utf_character_t_to_char_1(utf_character) << 24;
+            break;
+
+          case 2:
+            utf = (macro_f_utf_character_t_to_char_2(utf_character) << 24) | (macro_f_utf_character_t_to_char_1(utf_character) << 16);
+            break;
+
+          case 3:
+            utf = (macro_f_utf_character_t_to_char_3(utf_character) << 24) | (macro_f_utf_character_t_to_char_2(utf_character) << 16) | (macro_f_utf_character_t_to_char_1(utf_character) << 8);
+            break;
+
+          case 4:
+            utf = (macro_f_utf_character_t_to_char_4(utf_character) << 24) | (macro_f_utf_character_t_to_char_3(utf_character) << 16) | (macro_f_utf_character_t_to_char_2(utf_character) << 8) | macro_f_utf_character_t_to_char_1(utf_character);
+            break;
+
+          default:
+            return F_status_set_error(F_failure);
+        }
+
+        memcpy(*character, &utf, sizeof(f_char_t) * macro_f_utf_character_t_width_is(utf_character));
+      #else
+        memcpy(*character, &utf_character, sizeof(f_char_t) * macro_f_utf_character_t_width_is(utf_character));
+      #endif // __BYTE_ORDER == __LITTLE_ENDIAN
+
+      return F_none;
+    }
+
+    #if __BYTE_ORDER == __LITTLE_ENDIAN
+      f_utf_t utf = macro_f_utf_character_t_to_char_1(utf_character) << 24;
+
+      memcpy(*character, &utf, sizeof(f_char_t));
+    #else
+      memcpy(*character, &utf_character, sizeof(f_char_t));
+    #endif // __BYTE_ORDER == __LITTLE_ENDIAN
+
+    return F_none;
+  }
+#endif // _di_f_utf_character_to_char_
+
+#ifndef _di_f_utf_character_unicode_to_
+  f_status_t f_utf_character_unicode_to(const f_utf_character_t character, f_utf_t *unicode) {
+    #ifndef _di_level_0_parameter_checking_
+      if (!unicode) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    return private_f_utf_character_unicode_to(character, unicode);
+  }
+#endif // _di_f_utf_character_unicode_to_
+
+#ifndef _di_f_utf_character_unicode_from_
+  f_status_t f_utf_character_unicode_from(const f_utf_t unicode, f_utf_character_t *character) {
+    #ifndef _di_level_0_parameter_checking_
+      if (!character) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (unicode > 0x10ffff) {
+      return F_status_set_error(F_utf);
+    }
+
+    // U+0000 -> U+007F.
+    if (unicode < 0x80) {
+      *character = unicode;
+    }
+
+    // U+0080 -> U+07FF.
+    else if (unicode < 0x800) {
+      *character = (unicode & 0x7c0) << 2;
+      *character |= unicode & 0x3f;
+      *character |= 0xc080;
+    }
+
+    // U+0800 -> U+FFFF.
+    else if (unicode < 0x10000) {
+      *character = (unicode & 0xf000) << 4;
+      *character |= (unicode & 0xfc0) << 2;
+      *character |= unicode & 0x3f;
+      *character |= 0xe08080;
+    }
+
+    // U+100000 -> U+10FFFF.
+    else {
+      *character = (unicode & 0x1c0000) << 6;
+      *character |= (unicode & 0x3f000) << 4;
+      *character |= (unicode & 0xfc0) << 2;
+      *character |= unicode & 0x3f;
+      *character |= 0xe0808080;
+    }
+
+    return F_none;
+  }
+#endif // _di_f_utf_character_unicode_from_
+
+#ifndef _di_f_utf_character_unicode_string_to_
+  f_status_t f_utf_character_unicode_string_to(const f_utf_string_t string, const f_array_length_t length, f_utf_t *unicode) {
+    #ifndef _di_level_0_parameter_checking_
+      if (!string) return F_status_set_error(F_parameter);
+      if (!unicode) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    f_array_length_t i = 0;
+
+    while (i < length && !string[i]) {
+      ++i;
+    } // while
+
+    if (i < length) {
+      if (macro_f_utf_character_t_width_is(string[i])) {
+        i = length;
+      }
+      else {
+        if (macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_u_s.string[0] || macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_U_s.string[0]) {
+          do {
+            ++i;
+          } while (i < length && !string[i]);
+
+          if (i < length && !macro_f_utf_character_t_width_is(string[i]) && macro_f_utf_character_t_to_char_1(string[i]) == f_string_ascii_plus_s.string[0]) {
+            ++i;
+          }
+          else {
+            i = length;
+          }
+        }
+        else {
+          i = length;
+        }
+      }
+    }
+
+    if (i == length) {
+      return F_status_set_error(F_valid_not);
+    }
+
+    f_utf_t value = 0;
+    uint8_t character = 0;
+
+    for (; i < length; ++i) {
+
+      if (!string[i]) continue;
+
+      // Only ASCII character numbers are allowed to represent
+      if (macro_f_utf_character_t_width_is(string[i])) {
+        return F_status_set_error(F_valid_not);
+      }
+
+      value *= 16;
+      character = macro_f_utf_character_t_to_char_1(string[i]);
+
+      if (character > 0x2f && character < 0x3a) {
+        value += character - 0x30;
+      }
+      else if (character > 0x40 && character < 0x47) {
+        value += (character - 0x41) + 10;
+      }
+      else if (character > 0x60 && character < 0x67) {
+        value += (character - 0x61) + 10;
+      }
+      else {
+        return F_status_set_error(F_valid_not);
+      }
+    } // for
+
+    *unicode = value;
+
+    return F_none;
+  }
+#endif // _di_f_utf_character_unicode_string_to_
+
+#ifndef _di_f_utf_unicode_from_
+  f_status_t f_utf_unicode_from(const f_utf_t unicode, const f_array_length_t width_max, f_string_t *character) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+      if (!unicode) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    // @fixme the code here needs to be reviewed for endianess accuracy for both big and little endian.
+    if (unicode > 0x10ffff) {
+      return F_status_set_error(F_utf);
+    }
+
+    if (unicode < 0x80) {
+
+      // U+0000 -> U+007F
+      (*character)[0] = (uint8_t) unicode;
+
+      if (width_max > 1) {
+        (*character)[1] = 0;
+
+        if (width_max > 2) {
+          (*character)[2] = 0;
+
+          if (width_max > 3) {
+            (*character)[3] = 0;
+          }
+        }
+      }
+    }
+    else if (unicode < 0x800) {
+      if (width_max < 2) {
+        return F_status_set_error(F_utf);
+      }
+
+      // U+0080 -> U+07FF
+      (*character)[0] = F_utf_byte_2_d | ((uint8_t) ((unicode & 0x7c0) >> 6));
+      (*character)[1] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
+
+      if (width_max > 2) {
+        (*character)[2] = 0;
+
+        if (width_max > 2) {
+          (*character)[2] = 0;
+        }
+      }
+    }
+    else if (unicode < 0x10000) {
+      if (width_max < 3) {
+        return F_status_set_error(F_utf);
+      }
+
+      // U+0800 -> U+FFFF
+      (*character)[0] = F_utf_byte_3_d | ((uint8_t) ((unicode & 0xf000) >> 12));
+      (*character)[1] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0xfc0) >> 6));
+      (*character)[2] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
+
+      if (width_max > 3) {
+        character[3] = 0;
+      }
+    }
+    else {
+      if (width_max < 4) {
+        return F_status_set_error(F_utf);
+      }
+
+      // U+10000 -> U+10FFFF
+      (*character)[0] = F_utf_byte_4_d | ((uint8_t) ((unicode & 0x1c0000) >> 18));
+      (*character)[1] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0x3f000) >> 12));
+      (*character)[2] = F_utf_byte_1_d | ((uint8_t) ((unicode & 0xfc0) >> 6));
+      (*character)[3] = F_utf_byte_1_d | ((uint8_t) (unicode & 0x3f));
+    }
+
+    return F_none;
+  }
+#endif // _di_f_utf_unicode_from_
+
+#ifndef _di_f_utf_unicode_to_
+  f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, f_utf_t *unicode) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+      if (!unicode) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    f_utf_character_t character_utf = 0;
+
+    {
+      const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+      if (F_status_is_error(status)) return status;
+    }
+
+    return private_f_utf_character_unicode_to(character_utf, unicode);
+  }
+#endif // _di_f_utf_unicode_to_
+
+#ifndef _di_f_utf_unicode_string_to_f_
+  f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, f_utf_t *unicode) {
+    #ifndef _di_level_0_parameter_checking_
+      if (!unicode) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    f_array_length_t i = 0;
+
+    while (i < length && !string[i]) {
+      ++i;
+    } // while
+
+    if (i < length) {
+      if (string[i] == f_string_ascii_u_s.string[0] || string[i] == f_string_ascii_U_s.string[0]) {
+        do {
+          ++i;
+        } while (i < length && !string[i]);
+
+        if (i < length && string[i] == f_string_ascii_plus_s.string[0]) {
+          ++i;
+        }
+        else {
+          i = length;
+        }
+      }
+      else {
+        i = length;
+      }
+    }
+
+    if (i == length) {
+      return F_status_set_error(F_valid_not);
+    }
+
+    f_utf_t value = 0;
+
+    for (; i < length; ++i) {
+
+      if (!string[i]) continue;
+
+      value *= 16;
+
+      if (string[i] > 0x2f && string[i] < 0x3a) {
+        value += string[i] - 0x30;
+      }
+      else if (string[i] > 0x40 && string[i] < 0x47) {
+        value += (string[i] - 0x41) + 10;
+      }
+      else if (string[i] > 0x60 && string[i] < 0x67) {
+        value += (string[i] - 0x61) + 10;
+      }
+      else {
+        return F_status_set_error(F_valid_not);
+      }
+    } // for
+
+    if (value > 0x10ffff) {
+      return F_status_set_error(F_valid_not);
+    }
+
+    *unicode = value;
+
+    return F_none;
+  }
+#endif // _di_f_utf_unicode_string_to_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/level_0/f_utf/c/utf/convert.h b/level_0/f_utf/c/utf/convert.h

new file mode 100644 (file)

index 0000000..007da72
--- /dev/null
+++ b/level_0/f_utf/c/utf/convert.h
@@ -0,0 +1,231 @@
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgplv2.1
+ *
+ * Defines UTF-8 "convert" functions.
+ *
+ * This is auto-included by utf.h and should not need to be explicitly included.
+ */
+#ifndef _F_utf_convert_h
+#define _F_utf_convert_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Convert a specialized f_utf_character_t type to a uint8_t, stored as a string (character buffer).
+ *
+ * This will also convert ASCII characters stored in the utf_character array.
+ * This will not resize character.
+ *
+ * @param utf_character
+ *   The UTF-8 character to convert from.
+ * @param character
+ *   A uint8_t representation of the UTF-8 character, stored as a string of width bytes.
+ *   If width_max is 0, then this should be set to 0.
+ * @param width_max
+ *   This is set to the max number of bytes available.
+ *   This is then updated to represent the max bytes used if enough space is available.
+ *
+ * @return
+ *   F_none if conversion was successful.
+ *
+ *   F_failure (with error bit) if width is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_to_char_
+  extern f_status_t f_utf_character_to_char(const f_utf_character_t utf_character, f_string_t *character, f_array_length_t *width_max);
+#endif // _di_f_utf_character_to_char_
+
+/**
+ * Convert a given (UTF-8) character into Unicode.
+ *
+ * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
+ * The Unicode is a 32-bit integer representing the Unicode (such as U+0001).
+ * The Unciode does not need to be interpretted like UTF-8, it simple is a sequence of number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @param character
+ *   The (UTF-8) character.
+ * @param unicode
+ *   The Unicode number.
+ *
+ * @return
+ *   F_none on success.
+ *
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is_valid()
+ */
+#ifndef _di_f_utf_character_unicode_to_
+  extern f_status_t f_utf_character_unicode_to(const f_utf_character_t character, f_utf_t *unicode);
+#endif // _di_f_utf_character_unicode_to_
+
+/**
+ * Convert a given Unicode into (UTF-8) character.
+ *
+ * The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
+ * The Unicode is a 32-bit integer representing the Unicode (such as U+0001).
+ * The Unciode does not need to be interpretted like UTF-8, it simple is a sequence of number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @param unicode
+ *   The Unicode number.
+ * @param character
+ *   The (UTF-8) character.
+ *
+ * @return
+ *   F_none on success.
+ *
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_unicode_from_
+  extern f_status_t f_utf_character_unicode_from(const f_utf_t unicode, f_utf_character_t *character);
+#endif // _di_f_utf_character_unicode_from_
+
+/**
+ * Convert a string of the format "U+FFFF" into the codepoint value.
+ *
+ * This ignores NULL characters.
+ * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
+ * The "U+" prefix is optional.
+ * Only ASCII characters are allowed to represent the Unicode sequence string.
+ *
+ * @param string
+ *   The string representing a Unicode sequence.
+ * @param length
+ *   The maximum number of characters.
+ * @param unicode
+ *   A 32-bit integer representing the Unicode (such as U+0001).
+ *   Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ *   F_none on success.
+ *
+ *   F_failure (with error bit) if width_max is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_valid_not (with error bit) if string is not a valid Unicode string.
+ */
+#ifndef _di_f_utf_character_unicode_string_to_
+  extern f_status_t f_utf_character_unicode_string_to(const f_utf_string_t string, const f_array_length_t length, f_utf_t *unicode);
+#endif // _di_f_utf_character_unicode_string_to_
+
+/**
+ * Convert an ASCII or UTF-8 character, stored as a string (character buffer), to the specialized f_utf_character_t type.
+ *
+ * @param character
+ *   The character string to be converted to the f_utf_character_t type.
+ *   There must be enough space allocated to convert against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for converting.
+ *   Can be anything greater than 0.
+ * @param character_utf
+ *   The generated character of type f_utf_character_t.
+ *   This value may be cleared, even on error.
+ *
+ * @return
+ *   F_none if conversion was successful.
+ *
+ *   F_failure (with error bit) if width is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_char_to_character_
+  extern f_status_t f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_character_t *character_utf);
+#endif // _di_f_utf_char_to_character_
+
+/**
+ * Convert a given Unicode into a string block representing a single character.
+ *
+ * @param character
+ *   The (UTF-8) character.
+ *   The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
+ * @param width_max
+ *   The max width available for representing the UTF-8 character.
+ *   There must be enough space in the character buffer to handle the Unicode width.
+ *   It is recommended to always have 4 characters (4 uint8_t) of space available in character.
+ * @param unicode
+ *   A 32-bit integer representing the Unicode (such as U+0001).
+ *   Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ *   F_none on success.
+ *
+ *   F_failure (with error bit) if width_max is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_unicode_from_
+  extern f_status_t f_utf_unicode_from(const f_utf_t unicode, const f_array_length_t width_max, f_string_t *character);
+#endif // _di_f_utf_unicode_from_
+
+/**
+ * Convert a given string block representing a single character into Unicode.
+ *
+ * @param character
+ *   The (UTF-8) character to convert to the Unicode representation.
+ *   The f_utf_character_t is a 32-bit integer containing UTF-8 sequences, unchanged.
+ * @param width_max
+ *   The max width available for representing the UTF-8 character.
+ *   There must be enough space in the character buffer to handle the Unicode width.
+ *   It is recommended to always have 4 characters (4 uint8_t) of space available in character.
+ * @param unicode
+ *   A 32-bit integer representing the Unicode (such as U+0001).
+ *   Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ *   F_none on success.
+ *
+ *   F_failure (with error bit) if width is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is_valid()
+ */
+#ifndef _di_f_utf_unicode_to_
+  extern f_status_t f_utf_unicode_to(const f_string_t character, const f_array_length_t width_max, f_utf_t *unicode);
+#endif // _di_f_utf_unicode_to_
+
+/**
+ * Convert a string of the format "U+FFFF" into the codepoint value.
+ *
+ * This ignores NULL characters.
+ * The string may only contain "U+" followed by a hexidecimal digit, upper or lower case.
+ * The "U+" prefix is optional.
+ * Only ASCII characters are allowed to represent the Unicode sequence string.
+ *
+ * @param string
+ *   The string representing a Unicode sequence.
+ * @param length
+ *   The maximum number of characters.
+ * @param unicode
+ *   A 32-bit integer representing the Unicode (such as U+0001).
+ *   Does not need to be interpretted like UTF-8, this is a number from 0 onto max supported Unicode integer value (U+10FFFF).
+ *
+ * @return
+ *   F_none on success.
+ *
+ *   F_failure (with error bit) if width_max is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_valid_not (with error bit) if string is not a valid Unicode string.
+ */
+#ifndef _di_f_utf_unicode_string_to_
+  extern f_status_t f_utf_unicode_string_to(const f_string_t string, const f_array_length_t length, f_utf_t *unicode);
+#endif // _di_f_utf_unicode_string_to_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _F_utf_is_h
diff --git a/level_0/f_utf/c/utf/dynamic.h b/level_0/f_utf/c/utf/dynamic.h

index 13aa609b4649a53d4d6b2c697fe6f5a075dcc5db..255f2c81be674016a76f118e6028098ad72c6016 100644 (file)
--- a/level_0/f_utf/c/utf/dynamic.h
+++ b/level_0/f_utf/c/utf/dynamic.h
@@ -151,24 +151,6 @@ extern "C" {
  #endif // _di_f_utf_string_dynamic_adjust_
  
  /**
- * Resize the dynamic string.
- *
- * @param length
- *   The new size to use.
- * @param dynamic
- *   The string to resize.
- *
- * @return
- *   F_none on success.
- *
- *   F_memory_not (with error bit) on out of memory.
- *   F_parameter (with error bit) if a parameter is invalid.
- */
-#ifndef _di_f_utf_string_dynamic_adjust_
-  extern f_status_t f_utf_string_dynamic_adjust(const f_array_length_t length, f_utf_string_dynamic_t *dynamic);
-#endif // _di_f_utf_string_dynamic_adjust_
-
-/**
   * Append the source string onto the destination.
   *
   * @param source
diff --git a/level_0/f_utf/c/utf/is.c b/level_0/f_utf/c/utf/is.c

new file mode 100644 (file)

index 0000000..114dba0
--- /dev/null
+++ b/level_0/f_utf/c/utf/is.c
@@ -0,0 +1,963 @@
+#include "../utf.h"
+#include "../private-utf.h"
+#include "private-is_unassigned.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _di_f_utf_is_
+  f_status_t f_utf_is(const f_string_t character) {
+
+    return macro_f_utf_byte_width_is(*character);
+  }
+#endif // _di_f_utf_is_
+
+#ifndef _di_f_utf_is_alpha_
+  f_status_t f_utf_is_alpha(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_alpha(character_utf);
+    }
+
+    if (isalpha(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_alpha_
+
+#ifndef _di_f_utf_is_alpha_digit_
+  f_status_t f_utf_is_alpha_digit(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_alpha_digit(character_utf);
+    }
+
+    if (isalnum(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_alpha_digit_
+
+#ifndef _di_f_utf_is_alpha_numeric_
+  f_status_t f_utf_is_alpha_numeric(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_alpha_numeric(character_utf);
+    }
+
+    if (isalnum(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_alpha_numeric_
+
+#ifndef _di_f_utf_is_ascii_
+  f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return F_false;
+    }
+
+    return F_true;
+  }
+#endif // _di_f_utf_is_ascii_
+
+#ifndef _di_f_utf_is_combining_
+  f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_combining(character_utf);
+    }
+
+    // There are no ASCII combining characters.
+    return F_false;
+  }
+#endif // _di_f_utf_is_combining_
+
+#ifndef _di_f_utf_is_control_
+  f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_control(character_utf);
+    }
+
+    return iscntrl(*character);
+  }
+#endif // _di_f_utf_is_control_
+
+#ifndef _di_f_utf_is_control_code
+  f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_control_code(character_utf);
+    }
+
+    if (iscntrl(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_control_code_
+
+#ifndef _di_f_utf_is_control_format_
+  f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_control_format(character_utf);
+    }
+
+    // There are no ASCII control formats.
+    return F_false;
+  }
+#endif // _di_f_utf_is_control_format_
+
+#ifndef _di_f_utf_is_control_picture_
+  f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) != 3) {
+        return F_false;
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_control_picture(character_utf);
+    }
+
+    // There are no ASCII control pictures.
+    return F_false;
+  }
+#endif // _di_f_utf_is_control_picture_
+
+#ifndef _di_f_utf_is_digit_
+  f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_digit(character_utf);
+    }
+
+    if (isdigit(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_digit_
+
+#ifndef _di_f_utf_is_emoji_
+  f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_emoji(character_utf);
+    }
+
+    if (isdigit(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_emoji_
+
+#ifndef _di_f_utf_is_fragment_
+  f_status_t f_utf_is_fragment(const f_string_t character) {
+
+    if (macro_f_utf_byte_width_is(*character) == 1) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_fragment_
+
+#ifndef _di_f_utf_is_graph_
+  f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      if (private_f_utf_character_is_control(character_utf)) {
+        return F_false;
+      }
+
+      if (private_f_utf_character_is_whitespace(character_utf)) {
+        return F_false;
+      }
+
+      // Zero-width characters are be treated as a non-graph.
+      if (private_f_utf_character_is_zero_width(character_utf)) {
+        return F_false;
+      }
+
+      return F_true;
+    }
+
+    if (isgraph(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_graph_
+
+#ifndef _di_f_utf_is_numeric_
+  f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_numeric(character_utf);
+    }
+
+    if (isdigit(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_numeric_
+
+#ifndef _di_f_utf_is_phonetic_
+  f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_phonetic(character_utf);
+    }
+
+    // There are no ASCII phonetic characters.
+    return F_false;
+  }
+#endif // _di_f_utf_is_phonetic_
+
+#ifndef _di_f_utf_is_private_
+  f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_private(character_utf);
+    }
+
+    // There are no ASCII private characters.
+    return F_false;
+  }
+#endif // _di_f_utf_is_private_
+
+#ifndef _di_f_utf_is_punctuation_
+  f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_punctuation(character_utf);
+    }
+
+    // ASCII: '!' to '#'.
+    if (character[0] > 0x20 && character[0] < 0x24) {
+      return F_true;
+    }
+
+    // ASCII: '%' to '*'.
+    if (character[0] > 0x24 && character[0] < 0x2b) {
+      return F_true;
+    }
+
+    // ASCII: ',' to '/'.
+    if (character[0] > 0x2b && character[0] < 0x30) {
+      return F_true;
+    }
+
+    // ASCII: ':', ';', '?', or '@'.
+    if (character[0] == 0x3a || character[0] == 0x3b || character[0] == 0x3f || character[0] == 0x40) {
+      return F_true;
+    }
+
+    // ASCII: '[' to ']'.
+    if (character[0] > 0x5a && character[0] < 0x5d) {
+      return F_true;
+    }
+
+    // ASCII: '_', '{', or '}'.
+    if (character[0] == 0x5f || character[0] == 0x7b || character[0] == 0x7d) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_punctuation_
+
+#ifndef _di_f_utf_is_symbol_
+  f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_symbol(character_utf);
+    }
+
+    // ASCII: '$' or '+'.
+    if (character[0] == 0x24 || character[0] == 0x2b) {
+      return F_true;
+    }
+
+    // ASCII: '<' to '>'.
+    if (character[0] > 0x3c && character[0] < 0x3e) {
+      return F_true;
+    }
+
+    // ASCII: '^', '`', '|', or '~'.
+    if (character[0] == 0x5e || character[0] == 0x60 || character[0] == 0x7c || character[0] == 0x7e) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_symbol_
+
+#ifndef _di_f_utf_is_surrogate_
+  f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_surrogate(character_utf);
+    }
+
+    // ASCII are never surrogate.
+    return F_false;
+  }
+#endif // _di_f_utf_is_surrogate_
+
+#ifndef _di_f_utf_is_unassigned_
+  f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_unassigned(character_utf);
+    }
+
+    // ASCII are never unassigned.
+    return F_false;
+  }
+#endif // _di_f_utf_is_unassigned_
+
+#ifndef _di_f_utf_is_valid_
+  f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_valid(character_utf);
+    }
+
+    // ASCII are valid.
+    return F_true;
+  }
+#endif // _di_f_utf_is_valid_
+
+#ifndef _di_f_utf_is_whitespace_
+  f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_whitespace(character_utf);
+    }
+
+    if (isspace(*character)) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_whitespace_
+
+#ifndef _di_f_utf_is_whitespace_modifier_
+  f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_whitespace_modifier(character_utf);
+    }
+
+    // There are no ASCII whitespace modifiers.
+    return F_false;
+  }
+#endif // _di_f_utf_is_whitespace_modifier_
+
+#ifndef _di_f_utf_is_whitespace_other_
+  f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_whitespace_other(character_utf);
+    }
+
+    // There are no ASCII whitespace other.
+    return F_false;
+  }
+#endif // _di_f_utf_is_whitespace_other_
+
+#ifndef _di_f_utf_is_wide_
+  f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max) {
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_wide(character_utf);
+    }
+
+    // There are no wide ASCII characters.
+    return F_false;
+  }
+#endif // _di_f_utf_is_wide_
+
+#ifndef _di_f_utf_is_word_
+  f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_word(character_utf, strict);
+    }
+
+    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0]) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_word_
+
+#ifndef _di_f_utf_is_word_dash_
+  f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_word_dash(character_utf, strict);
+    }
+
+    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0]) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_word_dash_
+
+#ifndef _di_f_utf_is_word_dash_plus_
+  f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_word_dash_plus(character_utf, strict);
+    }
+
+    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0] || *character == f_string_ascii_plus_s.string[0]) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_word_dash_plus_
+
+#ifndef _di_f_utf_is_zero_width_
+  f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*character)) {
+      if (macro_f_utf_byte_width_is(*character) > width_max) {
+        return F_status_set_error(F_failure);
+      }
+
+      if (macro_f_utf_byte_width_is(*character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_character_t character_utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_zero_width(character_utf);
+    }
+
+    // These control characters are considered zero-width spaces.
+    if (*character >= 0x00 && *character <= 0x08) {
+      return F_true;
+    }
+    else if (*character >= 0x0c && *character <= 0x1f) {
+      return F_true;
+    }
+    else if (*character == 0x7f) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_is_zero_width_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/level_0/f_utf/c/utf/is.h b/level_0/f_utf/c/utf/is.h

new file mode 100644 (file)

index 0000000..099bfe3
--- /dev/null
+++ b/level_0/f_utf/c/utf/is.h
@@ -0,0 +1,777 @@
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgplv2.1
+ *
+ * Defines UTF-8 "is" functions.
+ *
+ * This is auto-included by utf.h and should not need to be explicitly included.
+ */
+#ifndef _F_utf_is_h
+#define _F_utf_is_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Check to see if the entire byte block of the character is a non-ASCII UTF-8 character.
+ *
+ * This does not check the validity of the character, for that instead use f_utf_is_valid().
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ *
+ * @return
+ *   F_true if a UTF-8 character.
+ *   F_false if not a UTF-8 character.
+ */
+#ifndef _di_f_utf_is_
+  extern f_status_t f_utf_is(const f_string_t character);
+#endif // _di_f_utf_is_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 alphabet character.
+ *   F_false if not a UTF-8 alphabet character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalpha()
+ */
+#ifndef _di_f_utf_is_alpha_
+  extern f_status_t f_utf_is_alpha(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_alpha_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or digit character.
+ *
+ * Digit characters are decimal digits and letter numbers.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 alphabet character.
+ *   F_false if not a UTF-8 alpha-numeric character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_alpha_digit_
+  extern f_status_t f_utf_is_alpha_digit(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_alpha_digit_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character.
+ *
+ * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 alphabet character.
+ *   F_false if not a UTF-8 alpha-numeric character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_alpha_numeric_
+  extern f_status_t f_utf_is_alpha_numeric(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_alpha_numeric_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if an ASCII character.
+ *   F_false if not an ASCII character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_ascii_
+  extern f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_ascii_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 combining character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 combining character.
+ *   F_false if not a UTF-8 combining character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_combining_
+  extern f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_combining_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
+ *
+ * This includes control code and control format characters.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 control character.
+ *   F_false if not a UTF-8 control character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see iscntrl()
+ */
+#ifndef _di_f_utf_is_control_
+  extern f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control code character.
+ *
+ * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 control code character.
+ *   F_false if not a UTF-8 control code character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_control_code_
+  extern f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_code_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control format character.
+ *
+ * Control Format characters are special characters used for formatting.
+ * These are considered control characters.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 control format character.
+ *   F_false if not a UTF-8 control format character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_control_format_
+  extern f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_format_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control picture character.
+ *
+ * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 control picture character.
+ *   F_false if not a UTF-8 control picture character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_control_picture_
+  extern f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_picture_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 digit character.
+ *   F_false if not a UTF-8 digit character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_is_digit_
+  extern f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_digit_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 emoji character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 emoji character.
+ *   F_false if not a UTF-8 emoji character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_emoji_
+  extern f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_emoji_
+
+/**
+ * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
+ *
+ * Characters whose width is 1-byte are invalid.
+ * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
+ *
+ * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
+ *
+ * According to rfc3629, the valid octect sequences for UTF-8 are:
+ *   UTF8-octets = *( UTF8-char )
+ *   UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
+ *   UTF8-1      = %x00-7F
+ *   UTF8-2      = %xC2-DF UTF8-tail
+ *   UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+ *                 %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+ *   UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+ *                 %xF4 %x80-8F 2( UTF8-tail )
+ *   UTF8-tail   = %x80-BF
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ *
+ * @return
+ *   F_true if a UTF-8 character.
+ *   F_false if not a UTF-8 character.
+ */
+#ifndef _di_f_utf_is_fragment_
+  extern f_status_t f_utf_is_fragment(const f_string_t character);
+#endif // _di_f_utf_is_fragment_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 graph.
+ *   F_false if not a UTF-8 graph.
+ *
+ *   F_maybe (with error bit) if this could be a graph but width is not long enough.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isgraph()
+ */
+#ifndef _di_f_utf_is_graph_
+  extern f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_graph_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character.
+ *
+ * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 numeric character.
+ *   F_false if not a UTF-8 numeric character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_is_numeric_
+  extern f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_numeric_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 phonetic character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 phonetic character.
+ *   F_false if not a UTF-8 phonetic character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_phonetic_
+  extern f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_phonetic_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 private character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 punctuation character.
+ *   F_false if not a UTF-8 punctuation character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_private_
+  extern f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_private_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 punctuation character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 punctuation character.
+ *   F_false if not a UTF-8 punctuation character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_punctuation_
+  extern f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_punctuation_
+
+/**
+ * Check to see if the entire byte block of the character is a surrogate UTF-8 character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 symbol character.
+ *   F_false if not a UTF-8 symbol character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_surrogate_
+  extern f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_surrogate_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 symbol character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 symbol character.
+ *   F_false if not a UTF-8 symbol character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_symbol_
+  extern f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_symbol_
+
+/**
+ * Check to see if the entire byte block of the character is a unassigned UTF-8 character.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if an unassigned UTF-8 character.
+ *   F_false if not an unassigned UTF-8 character.
+ *
+ *   F_parameter (with error bit) if a parameter is inunassigned.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_unassigned_
+  extern f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_unassigned_
+
+/**
+ * Check to see if the entire byte block of the character is a valid (well-formed) UTF-8 character.
+ *
+ * This does validate if the UTF-8 character is a valid UTF-8 character.
+ * To not do this, use f_utf_is().
+ *
+ * Valid ASCII character codes are considered valid by this function.
+ *
+ * Codes U+FDD0 to U+FDEF and any character ending in FFFE or FFFF are non-characters, and are therefore invalid.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a valid UTF-8 character or is an ASCII character.
+ *   F_false if not a valid UTF-8 character.
+ *
+ *   F_failure (with error bit) if width_max is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_valid_
+  extern f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_valid_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
+ *
+ * Non-printing or zero-width characters are not considered whitespace.
+ * This does include line separators like '\n'.
+ * This does not include phonetic spaces, like whitespace modifiers.
+ * This does not include non-true whitespace characters, such as Ogham Space Mark ( ).
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * However, because they are not renderred as whitespace, they are technically not white space.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 whitespace.
+ *   F_false if not a UTF-8 whitespace.
+ *
+ *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isspace()
+ */
+#ifndef _di_f_utf_is_whitespace_
+  extern f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_whitespace_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 whitespace modifier character.
+ *
+ * These are phonetic spaces.
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 whitespace.
+ *   F_false if not a UTF-8 whitespace.
+ *
+ *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_whitespace_modifier_
+  extern f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_whitespace_modifier_
+
+/**
+ * Check to see if the entire byte block of the character is an other type of UTF-8 space character.
+ *
+ * This is a list of whitespace that are not actual whitespace (because they are graph characters) but are considered whitespace, such as Ogham Space Mark ( ).
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 whitespace.
+ *   F_false if not a UTF-8 whitespace.
+ *
+ *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_whitespace_other_
+  extern f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_whitespace_other_
+
+/**
+ * Get whether or not the UTF-8 character is a wide character on display.
+ *
+ * This is not the wide as in width in bytes that the codepoint takes up in UTF-8.
+ * Instead, this is the width in characters on the screen the character takes up.
+ * When "wide" characters that take up either 2 characters on render.
+ * When "narrow" characters that take up either 1 character on render.
+ *
+ * @param character
+ *   The (UTF-8) character.
+ * @param width_max
+ *   The max width available for representing the UTF-8 character.
+ *   There must be enough space in the character buffer to handle the Unicode width.
+ *   It is recommended to always have 4 characters (4 uint8_t) of space available in character.
+ *   This is the width in bytes the codepoint takes up in UTF-8.
+ *
+ * @return
+ *   F_none on success.
+ *
+ *   F_failure (with error bit) if width_max is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_wide_
+  extern f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_wide_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
+ *
+ * A word character is alpha-digit or an underscore '_'.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ * @param strict
+ *   When TRUE, include all appropriate characters by type as per Unicode.
+ *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ *   When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ *   F_true if a UTF-8 word character.
+ *   F_false if not a UTF-8 word character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_word_
+  extern f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict);
+#endif // _di_f_utf_is_word_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
+ *
+ * A word dash character is alpha-digit, an underscore '_' or a dash '-'.
+ *
+ * Unicode appears to refer to dashes that connect words as a hyphen.
+ * Therefore, only these hyphens are considered dashes for the purposes of this function.
+ * All other dash-like Unicode characters are not considered a dash here.
+ * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ * @param strict
+ *   When TRUE, include all appropriate characters by type as per Unicode.
+ *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ *   When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ *   F_true if a UTF-8 word or dash character.
+ *   F_false if not a UTF-8 word or dash character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_word_dash_
+  extern f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict);
+#endif // _di_f_utf_is_word_dash_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
+ *
+ * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
+ *
+ * Unicode appears to refer to dashes that connect words as a hyphen.
+ * Therefore, only these hyphens are considered dashes for the purposes of this function.
+ * All other dash-like Unicode characters are not considered a dash here.
+ * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
+ *
+ * This does not include zero-width punctuation, such as "invisible plus" (U+2064) (even in strict mode).
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ * @param strict
+ *   When TRUE, include all appropriate characters by type as per Unicode.
+ *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ *   When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ *   F_true if a UTF-8 word or dash character.
+ *   F_false if not a UTF-8 word or dash character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_is_word_dash_plus_
+  extern f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict);
+#endif // _di_f_utf_is_word_dash_plus_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character.
+ *
+ * Only characters that do not print, which are generally called zero-width.
+ *
+ * @param character
+ *   The character to validate.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ *
+ * @return
+ *   F_true if a UTF-8 whitespace.
+ *   F_false if not a UTF-8 whitespace.
+ *
+ *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_zero_width_
+  extern f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_zero_width_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _F_utf_is_h
diff --git a/level_0/f_utf/c/utf/is_character.c b/level_0/f_utf/c/utf/is_character.c

new file mode 100644 (file)

index 0000000..d7f9bc1
--- /dev/null
+++ b/level_0/f_utf/c/utf/is_character.c
@@ -0,0 +1,567 @@
+#include "../utf.h"
+#include "../private-utf.h"
+#include "private-is_unassigned.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _di_f_utf_character_is_
+  f_status_t f_utf_character_is(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_utf_fragment;
+      }
+
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_
+
+#ifndef _di_f_utf_character_is_alpha_
+  f_status_t f_utf_character_is_alpha(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_alpha(character);
+    }
+
+    if (isalpha(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_alpha_
+
+#ifndef _di_f_utf_character_is_alpha_digit_
+  f_status_t f_utf_character_is_alpha_digit(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_alpha_digit(character);
+    }
+
+    if (isalnum(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_alpha_digit_
+
+#ifndef _di_f_utf_character_is_alpha_numeric_
+  f_status_t f_utf_character_is_alpha_numeric(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_alpha_numeric(character);
+    }
+
+    if (isalnum(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_alpha_numeric_
+
+#ifndef _di_f_utf_character_is_ascii_
+  f_status_t f_utf_character_is_ascii(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      return F_false;
+    }
+
+    return F_true;
+  }
+#endif // _di_f_utf_character_is_ascii_
+
+#ifndef _di_f_utf_character_is_combining_
+  f_status_t f_utf_character_is_combining(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_combining(character);
+    }
+
+    // There are no combining characters in ASCII.
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_combining_
+
+#ifndef _di_f_utf_character_is_control_
+  f_status_t f_utf_character_is_control(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_control(character);
+    }
+
+    if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_control_
+
+#ifndef _di_f_utf_character_is_control_code_
+  f_status_t f_utf_character_is_control_code(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_control_code(character);
+    }
+
+    if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_control_code_
+
+#ifndef _di_f_utf_character_is_control_picture_
+  f_status_t character_is_control_format(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_control_format(character);
+    }
+
+    // There are no control format characters in ASCII.
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_control_format_
+
+#ifndef _di_f_utf_character_is_control_picture_
+  f_status_t f_utf_character_is_control_picture(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_control_picture(character);
+    }
+
+    // There are no control picture characters in ASCII.
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_control_picture_
+
+#ifndef _di_f_utf_character_is_digit_
+  f_status_t f_utf_character_is_digit(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_digit(character);
+    }
+
+    if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_digit_
+
+#ifndef _di_f_utf_character_is_emoji_
+  f_status_t f_utf_character_is_emoji(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_emoji(character);
+    }
+
+    if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_emoji_
+
+#ifndef _di_f_utf_character_is_fragment_
+  f_status_t f_utf_character_is_fragment(const f_utf_character_t character) {
+
+    return macro_f_utf_character_t_width_is(character) == 1;
+  }
+#endif // _di_f_utf_character_is_fragment_
+
+#ifndef _di_f_utf_character_is_graph_
+  f_status_t f_utf_character_is_graph(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      if (private_f_utf_character_is_control(character)) {
+        return F_false;
+      }
+
+      if (private_f_utf_character_is_whitespace(character)) {
+        return F_false;
+      }
+
+      if (private_f_utf_character_is_zero_width(character)) {
+        return F_false;
+      }
+
+      return F_true;
+    }
+
+    if (isgraph(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_graph_
+
+#ifndef _di_f_utf_character_is_numeric_
+  f_status_t f_utf_character_is_numeric(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_numeric(character);
+    }
+
+    if (isdigit(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_numeric_
+
+#ifndef _di_f_utf_character_is_phonetic_
+  f_status_t f_utf_character_is_phonetic(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_phonetic(character);
+    }
+
+    // There are no ASCII phonetic characters.
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_phonetic_
+
+#ifndef _di_f_utf_character_is_private_
+  f_status_t f_utf_character_is_private(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_private(character);
+    }
+
+    // There are no ASCII private characters.
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_phonetic_
+
+#ifndef _di_f_utf_character_is_punctuation_
+  f_status_t f_utf_character_is_punctuation(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_punctuation(character);
+    }
+
+    // ASCII: '!' to '#'.
+    if (character > 0x20000000 && character < 0x24000000) {
+      return F_true;
+    }
+
+    // ASCII: '%' to '*'.
+    if (character > 0x24000000 && character < 0x2b000000) {
+      return F_true;
+    }
+
+    // ASCII: ',' to '/'.
+    if (character > 0x2b000000 && character < 0x30000000) {
+      return F_true;
+    }
+
+    // ASCII: ':', ';', '?', or '@'.
+    if (character == 0x3a000000 || character == 0x3b000000 || character == 0x3f000000 || character == 0x40000000) {
+      return F_true;
+    }
+
+    // ASCII: '[' to ']'.
+    if (character > 0x5a000000 && character < 0x5d000000) {
+      return F_true;
+    }
+
+    // ASCII: '_', '{', or '}'.
+    if (character == 0x5f000000 || character == 0x7b000000 || character == 0x7d000000) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_punctuation_
+
+#ifndef _di_f_utf_character_is_symbol_
+  f_status_t f_utf_character_is_symbol(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_symbol(character);
+    }
+
+    // ASCII: '$' or '+'.
+    if (character == 0x24000000 || character == 0x2b000000) {
+      return F_true;
+    }
+
+    // ASCII: '<' to '>'.
+    if (character > 0x3c000000 && character < 0x3e000000) {
+      return F_true;
+    }
+
+    // ASCII: '^', '`', '|', or '~'.
+    if (character == 0x5e000000 || character == 0x60000000 || character == 0x7c000000 || character == 0x7e000000) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_symbol_
+
+#ifndef _di_f_utf_character_is_unassigned_
+  f_status_t f_utf_character_is_unassigned(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_unassigned(character);
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_unassigned_
+
+#ifndef _di_f_utf_character_is_valid_
+  f_status_t f_utf_character_is_valid(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_valid(character);
+    }
+
+    return F_true;
+  }
+#endif // _di_f_utf_character_is_valid_
+
+#ifndef _di_f_utf_character_is_whitespace_
+  f_status_t f_utf_character_is_whitespace(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_whitespace(character);
+    }
+
+    if (isspace(macro_f_utf_character_t_to_char_1(character))) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_whitespace_
+
+#ifndef _di_f_utf_character_is_whitespace_modifier_
+  f_status_t f_utf_character_is_whitespace_modifier(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_whitespace_modifier(character);
+    }
+
+    // There are no ASCII whitespace modifiers.
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_whitespace_modifier_
+
+#ifndef _di_f_utf_character_is_whitespace_other_
+  f_status_t f_utf_character_is_whitespace_other(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_whitespace_other(character);
+    }
+
+    // There are no ASCII whitespace other.
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_whitespace_other_
+
+#ifndef _di_f_utf_character_is_wide_
+  f_status_t f_utf_character_is_wide(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_wide(character);
+    }
+
+    // There are no wide ASCII characters.
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_wide_
+
+#ifndef _di_f_utf_character_is_word_
+  f_status_t f_utf_character_is_word(const f_utf_character_t character, const bool strict) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_word(character, strict);
+    }
+
+    if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0]) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_word_
+
+#ifndef _di_f_utf_character_is_word_dash_
+  f_status_t f_utf_character_is_word_dash(const f_utf_character_t character, const bool strict) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_word_dash(character, strict);
+    }
+
+    if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0] || character == f_string_ascii_minus_s.string[0]) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_word_dash_
+
+#ifndef _di_f_utf_character_is_word_dash_plus_
+  f_status_t f_utf_character_is_word_dash_plus(const f_utf_character_t character, const bool strict) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_word_dash_plus(character, strict);
+    }
+
+    if (isalnum(macro_f_utf_character_t_to_char_1(character)) || character == f_string_ascii_underscore_s.string[0] || character == f_string_ascii_minus_s.string[0] || character == f_string_ascii_plus_s.string[0]) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_word_dash_plus_
+
+#ifndef _di_f_utf_character_is_zero_width_
+  f_status_t f_utf_character_is_zero_width(const f_utf_character_t character) {
+
+    if (macro_f_utf_character_t_width_is(character)) {
+      if (macro_f_utf_character_t_width_is(character) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_zero_width(character);
+    }
+
+    const uint8_t ascii = macro_f_utf_character_t_to_char_1(character);
+
+    // These control characters are considered zero-width spaces.
+    if (ascii >= 0x00 && ascii <= 0x08) {
+      return F_true;
+    }
+    else if (ascii == 0x0a) {
+      return F_true;
+    }
+    else if (ascii >= 0x0c && ascii <= 0x1f) {
+      return F_true;
+    }
+    else if (ascii == 0x7f) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_zero_width_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/level_0/f_utf/c/utf/is_character.h b/level_0/f_utf/c/utf/is_character.h

new file mode 100644 (file)

index 0000000..a56870d
--- /dev/null
+++ b/level_0/f_utf/c/utf/is_character.h
@@ -0,0 +1,660 @@
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgplv2.1
+ *
+ * Defines UTF-8 "character_is" functions.
+ *
+ * This is auto-included by utf.h and should not need to be explicitly included.
+ */
+#ifndef _F_utf_is_character_h
+#define _F_utf_is_character_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Check to see if the entire byte block of the character is a non-ASCII UTF-8 character.
+ *
+ * This does not validate if the UTF-8 character is a valid UTF-8 character, for that use f_utf_character_is_valid().
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 character.
+ *   F_false if not a UTF-8 character.
+ *   F_utf_fragment if this is a UTF-8 character fragment.
+ *
+ * @see f_utf_character_is_valid()
+ */
+#ifndef _di_f_utf_character_is_
+  extern f_status_t f_utf_character_is(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 alphabet character.
+ *   F_false if not a UTF-8 alphabet character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalpha()
+ */
+#ifndef _di_f_utf_character_is_alpha_
+  extern f_status_t f_utf_character_is_alpha(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_alpha_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
+ *
+ * Digit characters are decimal digits and letter numbers.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 alpha-digit character.
+ *   F_false if not a UTF-8 alpha-digit character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_alpha_digit_
+  extern f_status_t f_utf_character_is_alpha_digit(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_alpha_digit_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character.
+ *
+ * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 alpha-numeric character.
+ *   F_false if not a UTF-8 alpha-numeric character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_alpha_numeric_
+  extern f_status_t f_utf_character_is_alpha_numeric(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_alpha_numeric_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII character.
+ *
+ * This does not validate whether the UTF-8 character is valid or not.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if an ASCII character.
+ *   F_false if not an ASCII character.
+ */
+#ifndef _di_f_utf_character_is_ascii_
+  extern f_status_t f_utf_character_is_ascii(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_ascii_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 combining character.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 combining character.
+ *   F_false if not a UTF-8 combining character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_combining_
+  extern f_status_t f_utf_character_is_combining(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_combining_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
+ *
+ * This includes control code and control format characters.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 control character.
+ *   F_false if not a UTF-8 control character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see iscntrl()
+ */
+#ifndef _di_f_utf_character_is_control_
+  extern f_status_t f_utf_character_is_control(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control code character.
+ *
+ * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 control code character.
+ *   F_false if not a UTF-8 control code character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see iscntrl()
+ */
+#ifndef _di_f_utf_character_is_control_code_
+  extern f_status_t f_utf_character_is_control_code(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_code_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control format character.
+ *
+ * Control Format characters are special characters used for formatting.
+ * These are considered control characters.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 control format character.
+ *   F_false if not a UTF-8 control format character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_control_format_
+  extern f_status_t f_utf_character_is_control_format(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_format_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control picture character.
+ *
+ * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 control picture character.
+ *   F_false if not a UTF-8 control picture character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_control_picture_
+  extern f_status_t f_utf_character_is_control_picture(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_picture_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
+ *
+ * Digit characters are decimal digits and letter numbers.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 digit character.
+ *   F_false if not a UTF-8 digit character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_character_is_digit_
+  extern f_status_t f_utf_character_is_digit(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_digit_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 emoji character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 emoji character.
+ *   F_false if not a UTF-8 emoji character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_emoji_
+  extern f_status_t f_utf_character_is_emoji(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_emoji_
+
+/**
+ * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
+ *
+ * Characters whose width is 1-byte are invalid.
+ * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
+ *
+ * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
+ *
+ * According to rfc3629, the valid octect sequences for UTF-8 are:
+ *   UTF8-octets = *( UTF8-char )
+ *   UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
+ *   UTF8-1      = %x00-7F
+ *   UTF8-2      = %xC2-DF UTF8-tail
+ *   UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+ *                 %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+ *   UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+ *                 %xF4 %x80-8F 2( UTF8-tail )
+ *   UTF8-tail   = %x80-BF
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 character.
+ *   F_false if not a UTF-8 character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_valid()
+ */
+#ifndef _di_f_utf_character_is_fragment_
+  extern f_status_t f_utf_character_is_fragment(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_fragment_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 graph.
+ *   F_false if not a UTF-8 graph.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isgraph()
+ */
+#ifndef _di_f_utf_character_is_graph_
+  extern f_status_t f_utf_character_is_graph(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_graph_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character.
+ *
+ * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 numeric character.
+ *   F_false if not a UTF-8 numeric character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_character_is_numeric_
+  extern f_status_t f_utf_character_is_numeric(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_numeric_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 phonetic character.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 phonetic character.
+ *   F_false if not a UTF-8 phonetic character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_phonetic_
+  extern f_status_t f_utf_character_is_phonetic(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_phonetic_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 private character.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 private character.
+ *   F_false if not a UTF-8 private character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_private_
+  extern f_status_t f_utf_character_is_private(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_private_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 punctuation character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 punctuation character.
+ *   F_false if not a UTF-8 punctuation character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_punctuation_
+  extern f_status_t f_utf_character_is_punctuation(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_punctuation_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 symbol character.
+ *
+ * @todo Incomplete, UTF-8 codes not yet checked!
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 symbol character.
+ *   F_false if not a UTF-8 symbol character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_symbol_
+  extern f_status_t f_utf_character_is_symbol(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_symbol_
+
+/**
+ * Check to see if the entire byte block of the character is a unassigned (well-formed) UTF-8 character.
+ *
+ * The Surrogates and Private Use are not considered unassigned.
+ *
+ * This does validate if the UTF-8 character is a unassigned UTF-8 character.
+ * To not do this, use f_utf_character_is().
+ *
+ * @param character
+ *   The character to unassignedate.
+ *
+ * @return
+ *   F_true if a UTF-8 unassigned character.
+ *   F_false if not a UTF-8 unassigned character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_fragment()
+ */
+#ifndef _di_f_utf_character_is_unassigned_
+  extern f_status_t f_utf_character_is_unassigned(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_value_
+
+/**
+ * Check to see if the entire byte block of the character is a valid (well-formed) UTF-8 character.
+ *
+ * This does validate if the UTF-8 character is a valid UTF-8 character.
+ * To not do this, use f_utf_character_is().
+ *
+ * ASCII character codes are considered valid by this function.
+ *
+ * Codes U+FDD0 to U+FDEF and any character ending in FFFE or FFFF are non-characters, and are therefore invalid.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 character.
+ *   F_false if not a UTF-8 character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_fragment()
+ */
+#ifndef _di_f_utf_character_is_valid_
+  extern f_status_t f_utf_character_is_valid(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_value_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
+ *
+ * Non-printing or zero-width characters are not considered whitespace.
+ * This does include line separators like '\n'.
+ * This does not include phonetic spaces, like whitespace modifiers.
+ * This does not include non-true whitespace characters, such as Ogham Space Mark ( ).
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * However, because they are not renderred as whitespace, they are technically not white space.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 whitespace.
+ *   F_false if not a UTF-8 whitespace.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isspace()
+ */
+#ifndef _di_f_utf_character_is_whitespace_
+  extern f_status_t f_utf_character_is_whitespace(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_whitespace_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 whitespace modifier character.
+ *
+ * These are phonetic spaces.
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 modifier character.
+ *   F_false if not a UTF-8 modifier character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_whitespace_modifier_
+  extern f_status_t f_utf_character_is_whitespace_modifier(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_whitespace_modifier_
+
+/**
+ * Check to see if the entire byte block of the character is an other type of UTF-8 space character.
+ *
+ * This is a list of whitespace that are not actual whitespace (because they are graph characters) but are considered whitespace, such as Ogham Space Mark ( ).
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 (other) whitespace.
+ *   F_false if not a UTF-8 (other) whitespace.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isspace()
+ */
+#ifndef _di_f_utf_character_is_whitespace_other_
+  extern f_status_t f_utf_character_is_whitespace_other(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_whitespace_other_
+
+/**
+ * Get whether or not the UTF-8 character is a wide character on display.
+ *
+ * This is not the wide as in width in bytes that the codepoint takes up in UTF-8.
+ * Instead, this is the width in characters on the screen the character takes up.
+ * When "wide" characters that take up either 2 characters on render.
+ * When "narrow" characters that take up either 1 character on render.
+ *
+ * @param character
+ *   The (UTF-8) character.
+ *
+ * @return
+ *   F_none on success.
+ *
+ *   F_failure (with error bit) if width is not long enough to convert.
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_wide_
+  extern f_status_t f_utf_character_is_wide(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_wide_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
+ *
+ * A word character is alpha-numeric or an underscore '_'.
+ *
+ * @param character
+ *   The character to validate.
+ * @param strict
+ *   When TRUE, include all appropriate characters by type as per Unicode.
+ *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ *   When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ *   F_true if a UTF-8 word character.
+ *   F_false if not a UTF-8 word character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_word_
+  extern f_status_t f_utf_character_is_word(const f_utf_character_t character, const bool strict);
+#endif // _di_f_utf_character_is_word_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
+ *
+ * A word dash character is alpha-numeric, an underscore '_' or a dash '-'.
+ *
+ * Unicode appears to refer to dashes that connect words as a hyphen.
+ * Therefore, only these hyphens are considered dashes for the purposes of this function.
+ * All other dash-like Unicode characters are not considered a dash here.
+ * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
+ *
+ * @param character
+ *   The character to validate.
+ * @param strict
+ *   When TRUE, include all appropriate characters by type as per Unicode.
+ *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ *   When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ *   F_true if a UTF-8 word or dash character.
+ *   F_false if not a UTF-8 word or dash character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_word_dash_
+  extern f_status_t f_utf_character_is_word_dash(const f_utf_character_t character, const bool strict);
+#endif // _di_f_utf_character_is_word_dash_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
+ *
+ * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
+ *
+ * Unicode appears to refer to dashes that connect words as a hyphen.
+ * Therefore, only these hyphens are considered dashes for the purposes of this function.
+ * All other dash-like Unicode characters are not considered a dash here.
+ * The dash here is intended for combining words, which matches the context of the Unicode "hyphen".
+ *
+ * This does not include zero-width punctuation, such as "invisible plus" (U+2064) (even in strict mode).
+ *
+ * @param character
+ *   The character to validate.
+ * @param strict
+ *   When TRUE, include all appropriate characters by type as per Unicode.
+ *   When FALSE, non-inline punctuation connectors are not considered a character (such as U+FE33 '︳').
+ *   When FALSE, zero-width punctuation characters are not considered a character.
+ *
+ * @return
+ *   F_true if a UTF-8 word or dash character.
+ *   F_false if not a UTF-8 word or dash character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *
+ * @see isalnum()
+ */
+#ifndef _di_f_utf_character_is_word_dash_plus_
+  extern f_status_t f_utf_character_is_word_dash_plus(const f_utf_character_t character, const bool strict);
+#endif // _di_f_utf_character_is_word_dash_plus_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character.
+ *
+ * Only characters that do not print, which are generally called zero-width.
+ *
+ * @param character
+ *   The character to validate.
+ *
+ * @return
+ *   F_true if a UTF-8 non-printing or zero-width character.
+ *   F_false if not a UTF-8 non-printing or zero-width character.
+ *
+ *   F_utf (with error bit) if unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_character_is_zero_width_
+  extern f_status_t f_utf_character_is_zero_width(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_zero_width_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _F_utf_is_character_h
diff --git a/level_0/f_utf/data/build/settings b/level_0/f_utf/data/build/settings

index 465b699da1aa1b1c32e0f3aed407d586e94b9629..26c3e419ff957b0922a03a2445b46ff8cb49488c 100644 (file)
--- a/level_0/f_utf/data/build/settings
+++ b/level_0/f_utf/data/build/settings
@@ -31,7 +31,7 @@ build_objects_library_static
  build_objects_program
  build_objects_program_shared
  build_objects_program_static
-build_sources_library utf.c private-utf.c utf/common.c utf/dynamic.c utf/map.c utf/private-is_unassigned.c utf/private-string.c utf/string.c utf/triple.c
+build_sources_library utf.c private-utf.c utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/private-is_unassigned.c utf/private-string.c utf/string.c utf/triple.c
  build_sources_library_shared
  build_sources_library_static
  build_sources_object
@@ -40,7 +40,7 @@ build_sources_object_static
  build_sources_program
  build_sources_program_shared
  build_sources_program_static
-build_sources_headers utf.h utf/common.h utf/dynamic.h utf/map.h utf/string.h utf/triple.h
+build_sources_headers utf.h utf/common.h utf/convert.h utf/dynamic.h utf/is.h utf/is_character.h utf/map.h utf/string.h utf/triple.h
  build_sources_headers_shared
  build_sources_headers_static
  build_sources_script
author	Kevin Day <thekevinday@gmail.com>
	Mon, 28 Mar 2022 00:56:35 +0000 (19:56 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Mon, 28 Mar 2022 01:45:41 +0000 (20:45 -0500)
build/level_0/settings		patch \| blob \| history
build/monolithic/settings		patch \| blob \| history
level_0/f_utf/c/utf.c		patch \| blob \| history
level_0/f_utf/c/utf.h		patch \| blob \| history
level_0/f_utf/c/utf/common.h		patch \| blob \| history
level_0/f_utf/c/utf/convert.c	[new file with mode: 0644]	patch \| blob
level_0/f_utf/c/utf/convert.h	[new file with mode: 0644]	patch \| blob
level_0/f_utf/c/utf/dynamic.h		patch \| blob \| history
level_0/f_utf/c/utf/is.c	[new file with mode: 0644]	patch \| blob
level_0/f_utf/c/utf/is.h	[new file with mode: 0644]	patch \| blob
level_0/f_utf/c/utf/is_character.c	[new file with mode: 0644]	patch \| blob
level_0/f_utf/c/utf/is_character.h	[new file with mode: 0644]	patch \| blob
level_0/f_utf/data/build/settings		patch \| blob \| history