Update: The f_utf project regarding digits and perform other clean ups.

author Kevin Day <thekevinday@gmail.com>

Mon, 20 Jun 2022 04:42:18 +0000 (23:42 -0500)

committer Kevin Day <thekevinday@gmail.com>

Mon, 20 Jun 2022 05:04:15 +0000 (00:04 -0500)
author Kevin Day <thekevinday@gmail.com>
Mon, 20 Jun 2022 04:42:18 +0000 (23:42 -0500)
committer Kevin Day <thekevinday@gmail.com>
Mon, 20 Jun 2022 05:04:15 +0000 (00:04 -0500)
diff --git a/build/level_0/settings b/build/level_0/settings

index 6984decdaef73db28e02926f126988ddeecff807..e86fdf928709d86d8ae608c2e2a6e5b1bae0c88c 100644 (file)
--- a/build/level_0/settings
+++ b/build/level_0/settings
@@ -43,7 +43,7 @@ build_sources_library status_string.c
  build_sources_library string.c private-string.c string/common.c string/dynamic.c string/map.c string/map_multi.c string/private-dynamic.c string/private-map.c string/private-map_multi.c string/private-quantity.c string/private-range.c string/private-triple.c string/quantity.c string/range.c string/static.c string/triple.c
  build_sources_library type_array/array_length.c type_array/cell.c type_array/fll_id.c type_array/int8.c type_array/int16.c type_array/int32.c type_array/int64.c type_array/int128.c type_array/state.c type_array/status.c type_array/uint8.c type_array/uint16.c type_array/uint32.c type_array/uint64.c type_array/uint128.c
  build_sources_library type_array/private-array_length.c type_array/private-cell.c type_array/private-fll_id.c type_array/private-int8.c type_array/private-int16.c type_array/private-int32.c type_array/private-int64.c type_array/private-int128.c type_array/private-state.c type_array/private-status.c type_array/private-uint8.c type_array/private-uint16.c type_array/private-uint32.c type_array/private-uint64.c type_array/private-uint128.c
-build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c
+build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_decimal.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c
  build_sources_library utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/private-dynamic.c utf/private-map.c utf/private-map_multi.c utf/private-triple.c utf/private-is_unassigned.c utf/private-string.c utf/static.c utf/string.c utf/triple.c
  build_sources_library-level thread.c private-thread.c
  build_sources_library_shared
diff --git a/build/monolithic/settings b/build/monolithic/settings

index c65254519dcd5a19d771db0b60d5be04465c3bd4..9a3e1762dbd03a4da073c1d295d4d5f336a6dec7 100644 (file)
--- a/build/monolithic/settings
+++ b/build/monolithic/settings
@@ -43,7 +43,7 @@ build_sources_library level_0/status_string.c
  build_sources_library level_0/string.c level_0/private-string.c level_0/string/common.c level_0/string/dynamic.c level_0/string/map.c level_0/string/map_multi.c level_0/string/private-dynamic.c level_0/string/private-map.c level_0/string/private-map_multi.c level_0/string/private-quantity.c level_0/string/private-range.c level_0/string/private-triple.c level_0/string/quantity.c level_0/string/range.c level_0/string/static.c level_0/string/triple.c
  build_sources_library level_0/type_array/array_length.c level_0/type_array/cell.c level_0/type_array/fll_id.c level_0/type_array/int8.c level_0/type_array/int16.c level_0/type_array/int32.c level_0/type_array/int64.c level_0/type_array/int128.c level_0/type_array/state.c level_0/type_array/status.c level_0/type_array/uint8.c level_0/type_array/uint16.c level_0/type_array/uint32.c level_0/type_array/uint64.c level_0/type_array/uint128.c
  build_sources_library level_0/type_array/private-array_length.c level_0/type_array/private-cell.c level_0/type_array/private-fll_id.c level_0/type_array/private-int8.c level_0/type_array/private-int16.c level_0/type_array/private-int32.c level_0/type_array/private-int64.c level_0/type_array/private-int128.c level_0/type_array/private-state.c level_0/type_array/private-status.c level_0/type_array/private-uint8.c level_0/type_array/private-uint16.c level_0/type_array/private-uint32.c level_0/type_array/private-uint64.c level_0/type_array/private-uint128.c
-build_sources_library level_0/utf.c level_0/private-utf.c level_0/private-utf_alphabetic.c level_0/private-utf_combining.c level_0/private-utf_control.c level_0/private-utf_digit.c level_0/private-utf_emoji.c level_0/private-utf_numeric.c level_0/private-utf_phonetic.c level_0/private-utf_private.c level_0/private-utf_punctuation.c level_0/private-utf_subscript.c level_0/private-utf_superscript.c level_0/private-utf_symbol.c level_0/private-utf_valid.c level_0/private-utf_whitespace.c level_0/private-utf_wide.c level_0/private-utf_word.c level_0/private-utf_zero_width.c
+build_sources_library level_0/utf.c level_0/private-utf.c level_0/private-utf_alphabetic.c level_0/private-utf_combining.c level_0/private-utf_control.c level_0/private-utf_decimal.c level_0/private-utf_digit.c level_0/private-utf_emoji.c level_0/private-utf_numeric.c level_0/private-utf_phonetic.c level_0/private-utf_private.c level_0/private-utf_punctuation.c level_0/private-utf_subscript.c level_0/private-utf_superscript.c level_0/private-utf_symbol.c level_0/private-utf_valid.c level_0/private-utf_whitespace.c level_0/private-utf_wide.c level_0/private-utf_word.c level_0/private-utf_zero_width.c
  build_sources_library level_0/utf/common.c level_0/utf/convert.c level_0/utf/dynamic.c level_0/utf/is.c level_0/utf/is_character.c level_0/utf/map.c level_0/utf/map_multi.c level_0/utf/static.c level_0/utf/string.c level_0/utf/triple.c level_0/utf/private-dynamic.c level_0/utf/private-is_unassigned.c level_0/utf/private-map.c level_0/utf/private-map_multi.c level_0/utf/private-triple.c level_0/utf/private-string.c
  
  build_sources_library level_1/control_group.c
diff --git a/level_0/f_utf/c/private-utf.c b/level_0/f_utf/c/private-utf.c

index 32457896044fd0198a618b72c823db6d5b0ac103..4d95c039980bd76c173734503ab30006b3227532 100644 (file)
--- a/level_0/f_utf/c/private-utf.c
+++ b/level_0/f_utf/c/private-utf.c
@@ -6,7 +6,7 @@
  extern "C" {
  #endif
  
-#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
+#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
    f_status_t private_f_utf_char_to_character(const f_string_t sequence, const f_array_length_t width_max, f_utf_char_t *character_utf) {
  
      if (!macro_f_utf_byte_width_is(*sequence)) {
@@ -45,7 +45,7 @@ extern "C" {
  
      return F_none;
    }
-#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
+#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
  
  #if !defined(_di_f_utf_unicode_to_) || !defined(_di_f_utf_character_unicode_to_)
    f_status_t private_f_utf_character_unicode_to(const f_utf_char_t sequence, uint32_t *codepoint) {
diff --git a/level_0/f_utf/c/private-utf.h b/level_0/f_utf/c/private-utf.h

index 60ef262c9d4458c9270b1b8dcf135224aff13da0..48394205192c6cd0778537411bec54c7c91e911e 100644 (file)
--- a/level_0/f_utf/c/private-utf.h
+++ b/level_0/f_utf/c/private-utf.h
@@ -44,12 +44,13 @@ extern "C" {
   * @see f_utf_character_is_valid()
   * @see f_utf_is_valid()
   * @see f_utf_is_alphabetic()
- * @see f_utf_is_alphabetic_digit()
+ * @see f_utf_is_alphabetic_decimal()
   * @see f_utf_is_alphabetic_numeric()
   * @see f_utf_is_ascii()
   * @see f_utf_is_combining()
   * @see f_utf_is_control()
   * @see f_utf_is_control_picture()
+ * @see f_utf_is_decimal()
   * @see f_utf_is_digit()
   * @see f_utf_is_emoji()
   * @see f_utf_is_graph()
@@ -70,9 +71,9 @@ extern "C" {
   * @see f_utf_is_zero_width()
   * @see f_utf_unicode_to()
   */
-#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
+#if !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
    extern f_status_t private_f_utf_char_to_character(const f_string_t character, const f_array_length_t width_max, f_utf_char_t *character_utf) F_attribute_visibility_internal_d;
-#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
+#endif // !defined(_di_f_utf_char_to_character_) || !defined(_di_f_utf_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_numeric_) || !defined(_di_f_utf_is_ascii_) || !defined(_di_f_utf_is_combining_) || !defined(_di_f_utf_is_control_) || !defined(_di_f_utf_is_control_picture_) || !defined(_di_f_utf_is_decimal_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_emoji_) || !defined(_di_f_utf_is_graph_) || !defined(_di_f_utf_is_numeric_) || !defined(_di_f_utf_is_phonetic_) || !defined(_di_f_utf_is_private_) || !defined(_di_f_utf_is_punctuation_) || !defined(_di_f_utf_is_symbol_) || !defined(_di_f_utf_is_unassigned_) || !defined(_di_f_utf_is_valid_) || !defined(_di_f_utf_is_whitespace_) || !defined(_di_f_utf_is_whitespace_modifier_) || !defined(_di_f_utf_is_whitespace_other_) || !defined(_di_f_utf_is_wide_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_) || !defined(_di_f_utf_is_zero_width_) || !defined(f_utf_unicode_to)
  
  /**
   * Private implementation of f_utf_character_is_zero_width().
diff --git a/level_0/f_utf/c/private-utf_alphabetic.c b/level_0/f_utf/c/private-utf_alphabetic.c

index a35c162890575d5e1d99d0786915c31e0fb35f36..83d6dfdd261c689b92f4d0adc4530b53e792288e 100644 (file)
--- a/level_0/f_utf/c/private-utf_alphabetic.c
+++ b/level_0/f_utf/c/private-utf_alphabetic.c
@@ -3,6 +3,7 @@
  #include "private-utf_alphabetic.h"
  #include "private-utf_combining.h"
  #include "private-utf_control.h"
+#include "private-utf_decimal.h"
  #include "private-utf_digit.h"
  #include "private-utf_numeric.h"
  #include "private-utf_phonetic.h"
@@ -63,6 +64,54 @@ extern "C" {
    }
  #endif // !defined(_di_f_utf_character_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_)
  
+#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_)
+  f_status_t private_f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) {
+
+    if (private_f_utf_character_is_decimal(sequence, F_true, value)) {
+      return F_true;
+    }
+
+    if (private_f_utf_character_is_zero_width(sequence)) {
+      return F_false;
+    }
+
+    // The is_control() handles both is_control_code() and is_control_format().
+    if (private_f_utf_character_is_control(sequence)) {
+      return F_false;
+    }
+
+    if (private_f_utf_character_is_control_picture(sequence)) {
+      return F_false;
+    }
+
+    if (private_f_utf_character_is_whitespace(sequence)) {
+      return F_false;
+    }
+
+    if (private_f_utf_character_is_whitespace_modifier(sequence)) {
+      return F_false;
+    }
+
+    if (private_f_utf_character_is_numeric(sequence)) {
+      return F_false;
+    }
+
+    if (private_f_utf_character_is_punctuation(sequence)) {
+      return F_false;
+    }
+
+    if (private_f_utf_character_is_symbol(sequence)) {
+      return F_false;
+    }
+
+    if (private_f_utf_character_is_phonetic(sequence)) {
+      return F_false;
+    }
+
+    return F_true;
+  }
+#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_)
+
  #if !defined(_di_f_utf_character_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_digit_)
    f_status_t private_f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence) {
  
@@ -107,7 +156,7 @@ extern "C" {
        return F_false;
      }
  
-    return F_false;
+    return F_true;
    }
  #endif // !defined(_di_f_utf_character_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_digit_)
  
@@ -151,7 +200,7 @@ extern "C" {
        return F_false;
      }
  
-    return F_false;
+    return F_true;
    }
  #endif // !defined(_di_f_utf_character_is_alphabetic_numeric_) || !defined(_di_f_utf_is_alphabetic_numeric_)
  
diff --git a/level_0/f_utf/c/private-utf_alphabetic.h b/level_0/f_utf/c/private-utf_alphabetic.h

index 283142a7564510c46fb64c5854261659a401c322..4cb70851f28285c0c46c9a46483aecf3b6803a42 100644 (file)
--- a/level_0/f_utf/c/private-utf_alphabetic.h
+++ b/level_0/f_utf/c/private-utf_alphabetic.h
@@ -42,6 +42,38 @@ extern "C" {
  #endif // !defined(_di_f_utf_character_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_)
  
  /**
+ * Private implementation of f_utf_character_is_alphabetic_decimal().
+ *
+ * Intended to be shared to each of the different implementation variations.
+ *
+ * This expects the character width to be of at least size 2.
+ *
+ * @param sequence
+ *   The byte sequence to validate as a character.
+ * @param value
+ *   (optional) The integer representation of the character if the character is a decimal.
+ *   If specified, value is set to 0xffffffff (F_type_size_max_32_unsigned_d) to represent no known representation.
+ *   If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ *   If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ *   For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ *   If specified and is initially a value of 0xffffffff (F_type_size_max_32_unsigned_d), then this will grab all known integer digits.
+ *   Set to NULL to not use.
+ *
+ * @return
+ *   F_true if a UTF-8 alphabetic or a digit character.
+ *   F_false if not a UTF-8 alphabetic nor a digit character.
+ *
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ *   F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see f_utf_character_is_alphabetic_decimal()
+ * @see f_utf_is_alphabetic_decimal()
+ */
+#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_)
+  extern f_status_t private_f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) F_attribute_visibility_internal_d;
+#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_)
+
+/**
   * Private implementation of f_utf_character_is_alphabetic_digit().
   *
   * Intended to be shared to each of the different implementation variations.
diff --git a/level_0/f_utf/c/private-utf_decimal.c b/level_0/f_utf/c/private-utf_decimal.c

new file mode 100644 (file)

index 0000000..5f7d1ae
--- /dev/null
+++ b/level_0/f_utf/c/private-utf_decimal.c
@@ -0,0 +1,913 @@
+#include "utf.h"
+#include "private-utf.h"
+#include "private-utf_decimal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Inline helper function to reduce amount of code typed.
+ *
+ * Given the value, this will conditionally convert the range into an appropriate base-10 integer.
+ *
+ * This does not handle non-decimal values (non-base-10).
+ *
+ * @param sequence
+ *   The character sequence to process.
+ * @param always
+ *   The always return F_true boolean.
+ * @param start
+ *   An inclusive start range.
+ *   The stop range is determined from this.
+ * @param value
+ *   The value to update, if non-NULL.
+ *
+ * @return
+ *   F_true for valid digit in the requested range.
+ *   F_false, otherwise.
+ */
+static inline f_status_t private_inline_f_utf_character_handle_decimal(const f_utf_char_t sequence, const bool always, const f_utf_char_t start, uint32_t * const value) {
+
+  if (value) {
+    f_char_t ascii = 0;
+
+    if (macro_f_utf_char_t_width(sequence) == 2) {
+      ascii = (f_char_t) macro_f_utf_char_t_to_char_2(sequence - start);
+    }
+    else if (macro_f_utf_char_t_width(sequence) == 3) {
+      ascii = (f_char_t) macro_f_utf_char_t_to_char_3(sequence - start);
+    }
+    else if (macro_f_utf_char_t_width(sequence) == 4) {
+      ascii = (f_char_t) macro_f_utf_char_t_to_char_4(sequence - start);
+    }
+
+    ascii += 0x30;
+
+    return private_f_utf_character_is_decimal_for_ascii(ascii, always, value);
+  }
+
+  return F_true;
+}
+
+/**
+ * Inline helper function to reduce amount of code typed.
+ *
+ * Given the value, this will conditionally convert the range into an appropriate base-10 integer from 1 to 9.
+ *
+ * This does not handle non-decimal values (non-base-10).
+ *
+ * @param sequence
+ *   The character sequence to process.
+ * @param always
+ *   The always return F_true boolean.
+ * @param start
+ *   An inclusive start range.
+ *   The stop range is determined from this.
+ * @param value
+ *   The value to update, if non-NULL.
+ *
+ * @return
+ *   F_true for valid digit in the requested range.
+ *   F_false, otherwise.
+ */
+static inline f_status_t private_inline_f_utf_character_handle_roman_numeral(const f_utf_char_t sequence, const bool always, const f_utf_char_t start, uint32_t * const value) {
+
+  if (value) {
+    const f_char_t ascii = ((f_char_t) macro_f_utf_char_t_to_char_3(sequence - start)) + 0x31;
+
+    return private_f_utf_character_is_decimal_for_ascii(ascii, always, value);
+  }
+
+  return F_true;
+}
+
+#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+  f_status_t private_f_utf_character_is_decimal(const f_utf_char_t sequence, const bool always, uint32_t * const value) {
+
+    if (macro_f_utf_char_t_width_is(sequence) == 2) {
+
+      // Arabic: U+0660 to U+0669.
+      if (sequence >= 0xd9a00000 && sequence <= 0xd9a90000) {
+        return private_inline_f_utf_character_handle_decimal(sequence, always, 0xd9a00000, value);
+      }
+
+      // Extended Arabic: U+06F0 to U+06F9.
+      if (sequence >= 0xdbb00000 && sequence <= 0xdbb90000) {
+        return private_inline_f_utf_character_handle_decimal(sequence, always, 0xdbb00000, value);
+      }
+
+      // NKo: U+07C0 to U+07C9.
+      if (sequence >= 0xdf800000 && sequence <= 0xdf890000) {
+        return private_inline_f_utf_character_handle_decimal(sequence, always, 0xdf800000, value);
+      }
+
+      return F_false;
+    }
+
+    if (macro_f_utf_char_t_width_is(sequence) == 3) {
+
+      if (macro_f_utf_char_t_to_char_1(sequence) == 0xe0) {
+
+        // Devanagari: U+0966 to U+096F.
+        if (sequence >= 0xe0a5a600 && sequence <= 0xe0a5af00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a5a600, value);
+        }
+
+        // Bengali: U+09E6 to U+09EF.
+        if (sequence >= 0xe0a7a600 && sequence <= 0xe0a7af00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a7a600, value);
+        }
+
+        // Gurmukhi: U+0A66 to U+0A6F.
+        if (sequence >= 0xe0a9a600 && sequence <= 0xe0a9af00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0a9a600, value);
+        }
+
+        // Gujarati: U+0AE6 to U+0AEF.
+        if (sequence >= 0xe0aba600 && sequence <= 0xe0abaf00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0aba600, value);
+        }
+
+        // Oriya: U+0B66 to U+0B6F.
+        if (sequence >= 0xe0ada600 && sequence <= 0xe0adaf00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0ada600, value);
+        }
+
+        // Tamil: U+0BE6 to U+0BEF.
+        if (sequence >= 0xe0afa600 && sequence <= 0xe0afaf00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0afa600, value);
+        }
+
+        // Telugu: U+0C66 to U+0C6F.
+        if (sequence >= 0xe0b1a600 && sequence <= 0xe0b1af00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b1a600, value);
+        }
+
+        // Kannada: U+0CE6 to U+0CEF.
+        if (sequence >= 0xe0b3a600 && sequence <= 0xe0b3af00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b3a600, value);
+        }
+
+        // Malayalam: U+0D66 to U+0D6F.
+        if (sequence >= 0xe0b5a600 && sequence <= 0xe0b5af00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b5a600, value);
+        }
+
+        // Sinhala: U+0DE6 to U+0DEF.
+        if (sequence >= 0xe0b7a600 && sequence <= 0xe0b7af00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b7a600, value);
+        }
+
+        // Thai: U+0E50 to U+0E59.
+        if (sequence >= 0xe0b99000 && sequence <= 0xe0b99900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0b99000, value);
+        }
+
+        // Lao: U+0ED0 to U+0ED9.
+        if (sequence >= 0xe0bb9000 && sequence <= 0xe0bb9900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0bb9000, value);
+        }
+
+        // Tibetan: U+0F20 to U+0F29.
+        if (sequence >= 0xe0bca000 && sequence <= 0xe0bca900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe0bca000, value);
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe1) {
+
+        // Myanmar: U+1040 to U+1049.
+        if (sequence >= 0xe1818000 && sequence <= 0xe1818900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1818000, value);
+        }
+
+        // Myanmar (Shan): U+1090 to U+1099.
+        if (sequence >= 0xe1829000 && sequence <= 0xe1829900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1829000, value);
+        }
+
+        // Khmer: U+17E0 to U+17E9.
+        if (sequence >= 0xe19fa000 && sequence <= 0xe19fa900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe19fa000, value);
+        }
+
+        // Mongolian: U+1810 to U+1819.
+        if (sequence >= 0xe1a09000 && sequence <= 0xe1a09900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a09000, value);
+        }
+
+        // Limbu: U+1946 to U+194F.
+        if (sequence >= 0xe1a58600 && sequence <= 0xe1a58f00) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a58600, value);
+        }
+
+        // New Tai Lue: U+19D0 to U+19D9.
+        if (sequence >= 0xe1a79000 && sequence <= 0xe1a79900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1a79000, value);
+        }
+
+        // Tai Tham (Hora): U+1A80 to U+1A89.
+        if (sequence >= 0xe1aa8000 && sequence <= 0xe1aa8900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aa8000, value);
+        }
+
+        // Tai Tham (Tham): U+1A90 to U+1A99.
+        if (sequence >= 0xe1aa9000 && sequence <= 0xe1aa9900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aa9000, value);
+        }
+
+        // Balinese: U+1B50 to U+1B59.
+        if (sequence >= 0xe1ad9000 && sequence <= 0xe1ad9900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1ad9000, value);
+        }
+
+        // Sundanese: U+1BB0 to U+1BB9.
+        if (sequence >= 0xe1aeb000 && sequence <= 0xe1aeb900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1aeb000, value);
+        }
+
+        // Lepcha: U+1C40 to U+1C49.
+        if (sequence >= 0xe1b18000 && sequence <= 0xe1b18900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1b18000, value);
+        }
+
+        // Ol Chiki: U+1C50 to U+1C59.
+        if (sequence >= 0xe1b19000 && sequence <= 0xe1b19900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xe1b19000, value);
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) {
+
+        // Number Forms (Roman Numerals): U+2160 to U+2188.
+        if (sequence >= 0xe285a000 && sequence <= 0xe2868800) {
+
+          // Roman Numerals (large) for 1-9: U+2160 to U+2168.
+          if (sequence >= 0xe285a000 && sequence <= 0xe285a800) {
+            return private_inline_f_utf_character_handle_roman_numeral(sequence, always, 0xe285a000, value);
+          }
+
+          // Roman Numerals (small) for 1-9: U+2170 to U+2178.
+          if (sequence >= 0xe285b000 && sequence <= 0xe285b800) {
+            return private_inline_f_utf_character_handle_roman_numeral(sequence, always, 0xe285b000, value);
+          }
+
+          // Roman Numeral (late form) for 6: U+2185.
+          if (!value || *value > 5) {
+            if (sequence == 0xe2868500) {
+              *value = 6;
+
+              return F_true;
+            }
+          }
+
+          if (value) {
+            if (*value > 9) {
+
+              // Roman Numeral: U+2169, U+2179.
+              if (sequence == 0xe285a900 || sequence == 0xe285b900) {
+                *value = 10;
+
+                return F_true;
+              }
+
+              if (*value > 10) {
+
+                // Roman Numeral: U+216A, U+217A.
+                if (sequence == 0xe285aa00 || sequence == 0xe285ba00) {
+                  *value = 11;
+
+                  return F_true;
+                }
+
+                if (*value > 11) {
+
+                  // Roman Numeral: U+216B, U+217B.
+                  if (sequence == 0xe285ab00 || sequence == 0xe285bb00) {
+                    *value = 12;
+
+                    return F_true;
+                  }
+
+                  // All remaining are out of the range 0-16 and value must be set to 0xffff for them to be processed.
+                  if (*value == F_type_size_max_32_unsigned_d) {
+
+                    // Roman Numeral: U+216C, U+217C, U+2186.
+                    if (sequence == 0xe285ac00 || sequence == 0xe285bc00 || sequence == 0xe2868600) {
+                      *value = 50;
+
+                      return F_true;
+                    }
+
+                    // Roman Numeral: U+216D, U+217D, U+2183 (reversed, large), U+2184 (reversed, small).
+                    if (sequence == 0xe285ad00 || sequence == 0xe285bd00 || sequence == 0xe2868300 || sequence == 0xe2868400) {
+                      *value = 100;
+
+                      return F_true;
+                    }
+
+                    // Roman Numeral: U+216E, U+217E.
+                    if (sequence == 0xe285ae00 || sequence == 0xe285be00) {
+                      *value = 500;
+
+                      return F_true;
+                    }
+
+                    // Roman Numeral: U+216F, U+217F, U+2180 (1000 "CD").
+                    if (sequence == 0xe285af00 || sequence == 0xe285bf00 || sequence == 0xe2868000) {
+                      *value = 1000;
+
+                      return F_true;
+                    }
+
+                    // Roman Numeral: U+2181.
+                    if (sequence == 0xe2868100) {
+                      *value = 5000;
+
+                      return F_true;
+                    }
+
+                    // Roman Numeral: U+2182.
+                    if (sequence == 0xe2868200) {
+                      *value = 10000;
+
+                      return F_true;
+                    }
+
+                    // Roman Numeral: U+2187.
+                    if (sequence == 0xe2868700) {
+                      *value = 50000;
+
+                      return F_true;
+                    }
+
+                    // Roman Numeral: U+2188.
+                    if (sequence == 0xe2868800) {
+                      *value = 100000;
+
+                      return F_true;
+                    }
+                  }
+                }
+              }
+            }
+
+            *value = F_type_size_max_32_unsigned_d;
+          }
+
+          if (always) {
+            return F_true;
+          }
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_1(sequence) == 0xea) {
+
+        // Vai: U+A620 to U+A629.
+        if (sequence >= 0xea98a000 && sequence <= 0xea98a900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xea98a000, value);
+        }
+
+        // Saurashtra: U+A8D0 to U+A8D9.
+        if (sequence >= 0xeaa39000 && sequence <= 0xeaa39900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa39000, value);
+        }
+
+        // Kayah Li: U+A900 to U+A909.
+        if (sequence >= 0xeaa48000 && sequence <= 0xeaa48900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa48000, value);
+        }
+
+        // Javanese: U+A9D0 to U+A9D9.
+        if (sequence >= 0xeaa79000 && sequence <= 0xeaa79900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa79000, value);
+        }
+
+        // Myanmar Extended-B: U+A9F0 to U+A9F9.
+        if (sequence >= 0xeaa7b000 && sequence <= 0xeaa7b900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa7b000, value);
+        }
+
+        // Cham: U+AA50 to U+AA59.
+        if (sequence >= 0xeaa99000 && sequence <= 0xeaa99900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaa99000, value);
+        }
+
+        // Meetei Mayek: U+ABF0 to U+ABF9.
+        if (sequence >= 0xeaafb000 && sequence <= 0xeaafb900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xeaafb000, value);
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_1(sequence) == 0xef) {
+
+        // Halfwidth and Fullwidth Forms: U+FF10 to U+FF19.
+        if (sequence >= 0xefbc9000 && sequence <= 0xefbc9900) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xefbc9000, value);
+        }
+      }
+
+      return F_false;
+    }
+
+    if (macro_f_utf_char_t_to_char_1(sequence) == 0xf0) {
+
+      if (macro_f_utf_char_t_to_char_2(sequence) == 0x90) {
+
+        // Osmanya: U+104A0 to U+104A9.
+        if (sequence >= 0xf09092a0 && sequence <= 0xf09092a9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09092a0, value);
+        }
+
+        // Hanifi Rohingya: U+10D30 to U+10D39.
+        if (sequence >= 0xf090b4b0 && sequence <= 0xf090b4b9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf090b4b0, value);
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_2(sequence) == 0x91) {
+
+        // Brahmi: U+11066 to U+1106F.
+        if (sequence >= 0xf09181a6 && sequence <= 0xf09181af) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09181a6, value);
+        }
+
+        // Sora Sompeng: U+110F0 to U+110F9.
+        if (sequence >= 0xf09183b0 && sequence <= 0xf09183b9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09183b0, value);
+        }
+
+        // Chakma: U+11136 to U+1113F.
+        if (sequence >= 0xf09184b6 && sequence <= 0xf09184bf) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09184b6, value);
+        }
+
+        // Sharada: U+111D0 to U+111D9.
+        if (sequence >= 0xf0918790 && sequence <= 0xf0918799) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0918790, value);
+        }
+
+        // Khudawadi: U+112F0 to U+112F9.
+        if (sequence >= 0xf0918bb0 && sequence <= 0xf0918bb9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0918bb0, value);
+        }
+
+        // Newa: U+11450 to U+11459.
+        if (sequence >= 0xf0919190 && sequence <= 0xf0919199) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919190, value);
+        }
+
+        // Tirhuta: U+114D0 to U+114D9.
+        if (sequence >= 0xf0919390 && sequence <= 0xf0919399) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919390, value);
+        }
+
+        // Modi: U+11650 to U+11659.
+        if (sequence >= 0xf0919990 && sequence <= 0xf0919999) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919990, value);
+        }
+
+        // Takri: U+116C0 to U+116C9.
+        if (sequence >= 0xf0919b80 && sequence <= 0xf0919b89) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919b80, value);
+        }
+
+        // Ahom: U+11730 to U+11739.
+        if (sequence >= 0xf0919cb0 && sequence <= 0xf0919cb9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf0919cb0, value);
+        }
+
+        // Warang Citi: U+118E0 to U+118E9.
+        if (sequence >= 0xf091a3a0 && sequence <= 0xf091a3a9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091a3a0, value);
+        }
+
+        // Dives Akuru: U+11950 to U+11959.
+        if (sequence >= 0xf091a590 && sequence <= 0xf091a599) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091a590, value);
+        }
+
+        // Bhaiksuki: U+11C50 to U+11C59.
+        if (sequence >= 0xf091b190 && sequence <= 0xf091b199) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b190, value);
+        }
+
+        // Masaram Gondi: U+11D50 to U+11D59.
+        if (sequence >= 0xf091b590 && sequence <= 0xf091b599) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b590, value);
+        }
+
+        // Gunjala Gondi: U+11DA0 to U+11DA9.
+        if (sequence >= 0xf091b6a0 && sequence <= 0xf091b6a9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf091b6a0, value);
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_2(sequence) == 0x96) {
+
+        // Mro: U+16A60 to U+16A69.
+        if (sequence >= 0xf096a9a0 && sequence <= 0xf096a9a9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096a9a0, value);
+        }
+
+        // Tangsa: U+16AC0 to U+16AC9.
+        if (sequence >= 0xf096ab80 && sequence <= 0xf096ab89) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096ab80, value);
+        }
+
+        // Pahawh Hmong: U+16B50 to U+16B59.
+        if (sequence >= 0xf096ad90 && sequence <= 0xf096ad99) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf096ad90, value);
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9d) {
+
+        // Mathematical Alphanumeric (Bold) Symbols: U+1D7CE to U+1D7D7.
+        if (sequence >= 0xf09d9f8e && sequence <= 0xf09d9f97) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9f8e, value);
+        }
+
+        // Mathematical Alphanumeric (Double-Struck) Symbols: U+1D7D8 to U+1D7E1.
+        if (sequence >= 0xf09d9f98 && sequence <= 0xf09d9fa1) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9f98, value);
+        }
+
+        // Mathematical Alphanumeric (Sans-Serif) Symbols: U+1D7E2 to U+1D7EB.
+        if (sequence >= 0xf09d9fa2 && sequence <= 0xf09d9fab) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fa2, value);
+        }
+
+        // Mathematical Alphanumeric (Sans-Serif Bold) Symbols: U+1D7EC to U+1D7F5.
+        if (sequence >= 0xf09d9fac && sequence <= 0xf09d9fb5) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fac, value);
+        }
+
+        // Mathematical Alphanumeric (Monospace) Symbols: U+1D7F6 to U+1D7FF.
+        if (sequence >= 0xf09d9fb6 && sequence <= 0xf09d9fbf) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09d9fb6, value);
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9e) {
+
+        // Nyiakeng Puachue Hmong: U+1E140 to U+1E149.
+        if (sequence >= 0xf09e8580 && sequence <= 0xf09e8589) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09e8580, value);
+        }
+
+        // Wancho: U+1E2F0 to U+1E2F9.
+        if (sequence >= 0xf09e8bb0 && sequence <= 0xf09e8bb9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09e8bb0, value);
+        }
+
+        // Adlam: U+1E950 to U+1E959.
+        if (sequence >= 0xf09ea590 && sequence <= 0xf09ea599) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09ea590, value);
+        }
+      }
+      else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9f) {
+
+        // Symbols for Legacy Computing (Segmented): U+1FBF0 to U+1FBF9.
+        if (sequence >= 0xf09fafb0 && sequence <= 0xf09fafb9) {
+          return private_inline_f_utf_character_handle_decimal(sequence, always, 0xf09fafb0, value);
+        }
+      }
+    }
+
+    if (value) {
+      *value = F_type_size_max_32_unsigned_d;
+    }
+
+    return F_false;
+  }
+#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+
+#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+  f_status_t private_f_utf_character_is_decimal_for_ascii(const f_char_t character, const bool always, uint32_t * const value) {
+
+    if (always) {
+      if (isdigit(character)) {
+        if (value) {
+          if (character == f_string_ascii_0_s.string[0]) {
+            *value = 0;
+
+            return F_true;
+          }
+          else if (character == f_string_ascii_1_s.string[0]) {
+            if (!*value || *value > 1) {
+              *value = 1;
+            }
+            else {
+              *value = F_type_size_max_32_unsigned_d;
+            }
+
+            return F_true;
+          }
+          else if (character == f_string_ascii_2_s.string[0]) {
+            if (!*value || *value > 2) {
+              *value = 2;
+            }
+            else {
+              *value = F_type_size_max_32_unsigned_d;
+            }
+
+            return F_true;
+          }
+          else if (character == f_string_ascii_3_s.string[0]) {
+            if (!*value || *value > 3) {
+              *value = 3;
+            }
+            else {
+              *value = F_type_size_max_32_unsigned_d;
+            }
+
+            return F_true;
+          }
+          else if (character == f_string_ascii_4_s.string[0]) {
+            if (!*value || *value > 4) {
+              *value = 4;
+            }
+            else {
+              *value = F_type_size_max_32_unsigned_d;
+            }
+
+            return F_true;
+          }
+          else if (character == f_string_ascii_5_s.string[0]) {
+            if (!*value || *value > 5) {
+              *value = 5;
+            }
+            else {
+              *value = F_type_size_max_32_unsigned_d;
+            }
+
+            return F_true;
+          }
+          else if (character == f_string_ascii_6_s.string[0]) {
+            if (!*value || *value > 6) {
+              *value = 6;
+
+              return F_true;
+            }
+          }
+          else if (character == f_string_ascii_7_s.string[0]) {
+            if (!*value || *value > 7) {
+              *value = 7;
+            }
+            else {
+              *value = F_type_size_max_32_unsigned_d;
+            }
+
+            return F_true;
+          }
+          else if (character == f_string_ascii_8_s.string[0]) {
+            if (!*value || *value > 8) {
+              *value = 8;
+            }
+            else {
+              *value = F_type_size_max_32_unsigned_d;
+            }
+
+            return F_true;
+          }
+          else if (character == f_string_ascii_9_s.string[0]) {
+            if (!*value || *value > 9) {
+              *value = 9;
+            }
+            else {
+              *value = F_type_size_max_32_unsigned_d;
+            }
+
+            return F_true;
+          }
+
+          if (*value > 10) {
+            if (character == f_string_ascii_a_s.string[0] || character == f_string_ascii_A_s.string[0]) {
+              *value = 10;
+
+              return F_true;
+            }
+
+            if (*value > 11) {
+              if (character == f_string_ascii_b_s.string[0] || character == f_string_ascii_B_s.string[0]) {
+                *value = 11;
+
+                return F_true;
+              }
+
+              if (*value > 12) {
+                if (character == f_string_ascii_c_s.string[0] || character == f_string_ascii_C_s.string[0]) {
+                  *value = 12;
+
+                  return F_true;
+                }
+
+                if (*value > 13) {
+                  if (character == f_string_ascii_d_s.string[0] || character == f_string_ascii_D_s.string[0]) {
+                    *value = 13;
+
+                    return F_true;
+                  }
+
+                  if (*value > 14) {
+                    if (character == f_string_ascii_e_s.string[0] || character == f_string_ascii_E_s.string[0]) {
+                      *value = 14;
+
+                      return F_true;
+                    }
+
+                    if (*value > 15) {
+                      if (character == f_string_ascii_f_s.string[0] || character == f_string_ascii_F_s.string[0]) {
+                        *value = 15;
+
+                        return F_true;
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+        }
+
+        return F_true;
+      }
+
+      return F_false;
+    }
+
+    if (isdigit(character)) {
+      if (value) {
+        if (character == f_string_ascii_0_s.string[0]) {
+          *value = 0;
+
+          return F_true;
+        }
+
+        if (character == f_string_ascii_1_s.string[0]) {
+          if (!*value || *value > 1) {
+            *value = 1;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (character == f_string_ascii_2_s.string[0]) {
+          if (!*value || *value > 2) {
+            *value = 2;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (character == f_string_ascii_3_s.string[0]) {
+          if (!*value || *value > 3) {
+            *value = 3;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (character == f_string_ascii_4_s.string[0]) {
+          if (!*value || *value > 4) {
+            *value = 4;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (character == f_string_ascii_5_s.string[0]) {
+          if (!*value || *value > 5) {
+            *value = 5;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (character == f_string_ascii_6_s.string[0]) {
+          if (!*value || *value > 6) {
+            *value = 6;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (character == f_string_ascii_7_s.string[0]) {
+          if (!*value || *value > 7) {
+            *value = 7;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (character == f_string_ascii_8_s.string[0]) {
+          if (!*value || *value > 8) {
+            *value = 8;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (character == f_string_ascii_9_s.string[0]) {
+          if (!*value || *value > 9) {
+            *value = 9;
+
+            return F_true;
+          }
+
+          *value = F_type_size_max_32_unsigned_d;
+
+          return F_false;
+        }
+
+        if (*value > 10) {
+          if (character == f_string_ascii_a_s.string[0] || character == f_string_ascii_A_s.string[0]) {
+            *value = 10;
+
+            return F_true;
+          }
+
+          if (*value > 11) {
+            if (character == f_string_ascii_b_s.string[0] || character == f_string_ascii_B_s.string[0]) {
+              *value = 11;
+
+              return F_true;
+            }
+
+            if (*value > 12) {
+              if (character == f_string_ascii_c_s.string[0] || character == f_string_ascii_C_s.string[0]) {
+                *value = 12;
+
+                return F_true;
+              }
+
+              if (*value > 13) {
+                if (character == f_string_ascii_d_s.string[0] || character == f_string_ascii_D_s.string[0]) {
+                  *value = 13;
+
+                  return F_true;
+                }
+
+                if (*value > 14) {
+                  if (character == f_string_ascii_e_s.string[0] || character == f_string_ascii_E_s.string[0]) {
+                    *value = 14;
+
+                    return F_true;
+                  }
+
+                  if (*value > 15) {
+                    if (character == f_string_ascii_f_s.string[0] || character == f_string_ascii_F_s.string[0]) {
+                      *value = 15;
+
+                      return F_true;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+
+        *value = F_type_size_max_32_unsigned_d;
+
+        return F_false;
+      }
+
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/level_0/f_utf/c/private-utf_decimal.h b/level_0/f_utf/c/private-utf_decimal.h

new file mode 100644 (file)

index 0000000..5e0f181
--- /dev/null
+++ b/level_0/f_utf/c/private-utf_decimal.h
@@ -0,0 +1,91 @@
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgpl-2.1-or-later
+ *
+ * Provides UTF-8 capabilities.
+ *
+ * These are provided for internal reduction in redundant code.
+ * These should not be exposed/used outside of this project.
+ */
+#ifndef _PRIVATE_F_utf_decimal_h
+#define _PRIVATE_F_utf_decimal_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Private implementation of f_utf_character_is_decimal().
+ *
+ * Intended to be shared to each of the different implementation variations.
+ *
+ * This expects the character width to be of at least size 2.
+ *
+ * @param sequence
+ *   The byte sequence to validate as a character.
+ * @param always
+ *   Set to F_true to always return F_true for valid digits even if the valid digit would be invalid because it is out of the requested base range.
+ *   When F_false, this function returns F_true if the decimal digit is a valid decimal digit within the requested base range.
+ * @param value
+ *   (optional) The integer representation of the character if the character is a decimal.
+ *   If specified, value is set to 0xffff to represent no known representation.
+ *   If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ *   If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ *   For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ *   If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ *   Set to NULL to not use.
+ *
+ * @return
+ *   F_true if a UTF-8 decimal character.
+ *   F_false if not a UTF-8 decimal character.
+ *
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ *   F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see f_utf_character_is_decimal()
+ * @see f_utf_is_decimal()
+ */
+#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+  extern f_status_t private_f_utf_character_is_decimal(const f_utf_char_t sequence, const bool always, uint32_t * const value) F_attribute_visibility_internal_d;
+#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+
+/**
+ * Helper function for handling ascii-only tests.
+ *
+ * The width is always assumed to be 1.
+ *
+ * @param character
+ *   The ASCII character to validate.
+ * @param always
+ *   Set to F_true to always return F_true for valid digits even if the valid digit would be invalid because it is out of the requested base range.
+ *   When F_false, this function returns F_true if the decimal digit is a valid decimal digit within the requested base range.
+ * @param value
+ *   (optional) The integer representation of the character if the character is a decimal.
+ *   If specified, value is set to 0xffffffff (F_type_size_max_32_unsigned_d) to represent no known representation.
+ *   If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ *   If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ *   For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ *   If specified and is initially a value of 0xffffffff (F_type_size_max_32_unsigned_d), then this will grab all known integer digits.
+ *   Set to NULL to not use.
+ *
+ * @return
+ *   F_true if a UTF-8 decimal character.
+ *   F_false if not a UTF-8 decimal character.
+ *
+ * @see isdigit()
+ *
+ * @see f_utf_character_is_decimal()
+ * @see f_utf_is_decimal()
+ */
+#if !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+  extern f_status_t private_f_utf_character_is_decimal_for_ascii(const f_char_t character, const bool always, uint32_t * const value) F_attribute_visibility_internal_d;
+#endif // !defined(_di_f_utf_character_is_decimal_) || !defined(_di_f_utf_is_decimal_)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _PRIVATE_F_utf_decimal_h
diff --git a/level_0/f_utf/c/private-utf_digit.c b/level_0/f_utf/c/private-utf_digit.c

index 71b637ab21458b4e8b6e08a473c7c9904a8fa0ce..3e18ecee4813c36f996c51c63e5d1053f6bf6af5 100644 (file)
--- a/level_0/f_utf/c/private-utf_digit.c
+++ b/level_0/f_utf/c/private-utf_digit.c
@@ -6,7 +6,7 @@
  extern "C" {
  #endif
  
-#if !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_)
+#if !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_)
    f_status_t private_f_utf_character_is_digit(const f_utf_char_t sequence) {
  
      if (macro_f_utf_char_t_width_is(sequence) == 2) {
@@ -160,6 +160,13 @@ extern "C" {
            return F_true;
          }
        }
+      else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) {
+
+        // Number Forms: U+2160 to U+2188 (Roman Numerals).
+        if (sequence >= 0xe285a000 && sequence <= 0xe2868800) {
+          return F_true;
+        }
+      }
        else if (macro_f_utf_char_t_to_char_1(sequence) == 0xea) {
  
          // Vai: U+A620 to U+A629.
@@ -306,6 +313,11 @@ extern "C" {
            return F_true;
          }
  
+        // Tangsa: U+16AC0 to U+16AC9.
+        if (sequence >= 0xf096ab80 && sequence <= 0xf096ab89) {
+          return F_true;
+        }
+
          // Pahawh Hmong: U+16B50 to U+16B59.
          if (sequence >= 0xf096ad90 && sequence <= 0xf096ad99) {
            return F_true;
@@ -366,7 +378,7 @@ extern "C" {
  
      return F_false;
    }
-#endif // !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_)
+#endif // !defined(_di_f_utf_character_is_alphabetic_decimal_) || !defined(_di_f_utf_is_alphabetic_decimal_) || !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_is_digit_)
  
  #ifdef __cplusplus
  } // extern "C"
diff --git a/level_0/f_utf/c/private-utf_word.c b/level_0/f_utf/c/private-utf_word.c

index 08008addc53171ba5a594606c74a129d2c0abe40..0c77bd01423f8b6c5c0c0224bd704b48467dce54 100644 (file)
--- a/level_0/f_utf/c/private-utf_word.c
+++ b/level_0/f_utf/c/private-utf_word.c
@@ -10,7 +10,7 @@ extern "C" {
  #if !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_)
    f_status_t private_f_utf_character_is_word(const f_utf_char_t sequence, const bool strict) {
  
-    if (private_f_utf_character_is_alphabetic_digit(sequence)) {
+    if (private_f_utf_character_is_alphabetic_decimal(sequence, 0)) {
        return F_true;
      }
  
diff --git a/level_0/f_utf/c/utf/common.h b/level_0/f_utf/c/utf/common.h

index a7f868c21e494b9aaf9a4ec3c151e71e57c135a9..90f97dafaaab2dfa22e693cf8ed52dc2a59f3375 100644 (file)
--- a/level_0/f_utf/c/utf/common.h
+++ b/level_0/f_utf/c/utf/common.h
@@ -146,32 +146,32 @@ extern "C" {
  #endif // _di_f_utf_substitute_
  
  /**
- * Provide a basic UTF-8 character as a single 4-byte variable.
+ * Provide a basic UTF-8 byte sequence as a single 4-byte variable.
   *
- * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte character.
+ * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte sequence.
   *
- * This "character" type is stored as a big-endian 4-byte integer (32-bits).
- * A helper function, f_utf_is_big_endian(), is provided to detect system endianness so that character arrays (uint8_t []) can be correctly processed.
+ * This byte sequence type is stored as a big-endian 4-byte integer (32-bits).
+ * A helper function, f_utf_is_big_endian(), is provided to detect system endianness so that byte sequence arrays (uint8_t []) can be correctly processed.
   *
   * The byte structure is intended to be read left to right in memory regardless of system endianness.
- * This is done so that the first character (the left most) can be read naturally as a string, such as string[0] = first character.
+ * This is done so that the first byte (the left most) can be read naturally as a string, such as string[0] = first byte.
   *
   * On little-endian systems, the hex-string 0xff is represented as internally as 0x000000ff.
   * This needs to be converted into the internal representation of 0xff000000 to be properly represented as a "f_utf_char_t".
   *
- * The macro_f_utf_char_t_mask_byte_* are used to get the entire character set fo a given width.
+ * The macro_f_utf_char_t_mask_byte_* are used to get the entire byte sequence for a given width.
   *
- * The macro_f_utf_char_t_mask_char_* are used to get a specific UTF-8 block as a single character range.
+ * The macro_f_utf_char_t_mask_char_* are used to get a specific UTF-8 block as a single byte sequence range.
   *
   * The macro_f_utf_char_t_to_char_* are used to convert a f_utf_char_t into a uint8_t, for a given 8-bit block.
   *
   * The macro_f_utf_char_t_from_char_* are used to convert a uint8_t into part of a f_utf_char_t, for a given 8-bit block.
   *
- * The macro_f_utf_char_t_width is used to determine the width of the UTF-8 character based on macro_f_utf_byte_width.
- * The macro_f_utf_char_t_width_is is used to determine the width of the UTF-8 character based on macro_f_utf_byte_width_is.
+ * The macro_f_utf_char_t_width is used to determine the width of the UTF-8 byte sequence based on macro_f_utf_byte_width.
+ * The macro_f_utf_char_t_width_is is used to determine the width of the UTF-8 byte sequence based on macro_f_utf_byte_width_is.
   *
- * The macro_f_utf_char_t_width macro determines a width of the UTF-8 character based on macro_f_utf_byte_width.
- * The macro_f_utf_char_t_width_is is identical to macro_f_utf_char_t_width, except it returns 0 when character is ASCII.
+ * The macro_f_utf_char_t_width macro determines a width of the UTF-8 byte sequence based on macro_f_utf_byte_width.
+ * The macro_f_utf_char_t_width_is is identical to macro_f_utf_char_t_width, except it returns 0 when byte sequence is ASCII.
   *
   * The macros that end in "_be" or "_le" represent "big endian" and "little endian".
   * The default macros without the "_be" should be in "big endian" because the strings are always stored as if they were "big endian" without regard to the host byte order.
@@ -196,15 +196,15 @@ extern "C" {
    #define F_utf_char_mask_char_3_be_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000
    #define F_utf_char_mask_char_4_be_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111
  
-  #define macro_f_utf_char_t_to_char_1_be(character) (((character) & F_utf_char_mask_char_1_be_d) >> 24) // Grab first byte.
-  #define macro_f_utf_char_t_to_char_2_be(character) (((character) & F_utf_char_mask_char_2_be_d) >> 16) // Grab second byte.
-  #define macro_f_utf_char_t_to_char_3_be(character) (((character) & F_utf_char_mask_char_3_be_d) >> 8)  // Grab third byte.
-  #define macro_f_utf_char_t_to_char_4_be(character) ((character) & F_utf_char_mask_char_4_be_d)         // Grab fourth byte.
+  #define macro_f_utf_char_t_to_char_1_be(sequence) (((sequence) & F_utf_char_mask_char_1_be_d) >> 24) // Grab first byte.
+  #define macro_f_utf_char_t_to_char_2_be(sequence) (((sequence) & F_utf_char_mask_char_2_be_d) >> 16) // Grab second byte.
+  #define macro_f_utf_char_t_to_char_3_be(sequence) (((sequence) & F_utf_char_mask_char_3_be_d) >> 8)  // Grab third byte.
+  #define macro_f_utf_char_t_to_char_4_be(sequence) ((sequence) & F_utf_char_mask_char_4_be_d)         // Grab fourth byte.
  
-  #define macro_f_utf_char_t_from_char_1_be(character) (((character) << 24) & F_utf_char_mask_char_1_be_d) // Shift to first byte.
-  #define macro_f_utf_char_t_from_char_2_be(character) (((character) << 16) & F_utf_char_mask_char_2_be_d) // Shift to second byte.
-  #define macro_f_utf_char_t_from_char_3_be(character) (((character) << 8) & F_utf_char_mask_char_3_be_d)  // Shift to third byte.
-  #define macro_f_utf_char_t_from_char_4_be(character) ((character) & F_utf_char_mask_char_4_be_d)         // Shift to fourth byte.
+  #define macro_f_utf_char_t_from_char_1_be(sequence) (((sequence) << 24) & F_utf_char_mask_char_1_be_d) // Shift to first byte.
+  #define macro_f_utf_char_t_from_char_2_be(sequence) (((sequence) << 16) & F_utf_char_mask_char_2_be_d) // Shift to second byte.
+  #define macro_f_utf_char_t_from_char_3_be(sequence) (((sequence) << 8) & F_utf_char_mask_char_3_be_d)  // Shift to third byte.
+  #define macro_f_utf_char_t_from_char_4_be(sequence) ((sequence) & F_utf_char_mask_char_4_be_d)         // Shift to fourth byte.
  
    // Little Endian.
    #define F_utf_char_mask_byte_1_le_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111
@@ -217,15 +217,15 @@ extern "C" {
    #define F_utf_char_mask_char_3_le_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000
    #define F_utf_char_mask_char_4_le_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000
  
-  #define macro_f_utf_char_t_to_char_1_le(character) ((character) & F_utf_char_mask_char_1_le_d)         // Grab first byte.
-  #define macro_f_utf_char_t_to_char_2_le(character) (((character) & F_utf_char_mask_char_2_le_d) >> 8)  // Grab second byte.
-  #define macro_f_utf_char_t_to_char_3_le(character) (((character) & F_utf_char_mask_char_3_le_d) >> 16) // Grab third byte.
-  #define macro_f_utf_char_t_to_char_4_le(character) (((character) & F_utf_char_mask_char_4_le_d) >> 24) // Grab fourth byte.
+  #define macro_f_utf_char_t_to_char_1_le(sequence) ((sequence) & F_utf_char_mask_char_1_le_d)         // Grab first byte.
+  #define macro_f_utf_char_t_to_char_2_le(sequence) (((sequence) & F_utf_char_mask_char_2_le_d) >> 8)  // Grab second byte.
+  #define macro_f_utf_char_t_to_char_3_le(sequence) (((sequence) & F_utf_char_mask_char_3_le_d) >> 16) // Grab third byte.
+  #define macro_f_utf_char_t_to_char_4_le(sequence) (((sequence) & F_utf_char_mask_char_4_le_d) >> 24) // Grab fourth byte.
  
-  #define macro_f_utf_char_t_from_char_1_le(character) ((character) & F_utf_char_mask_char_1_le_d)         // Shift to first byte.
-  #define macro_f_utf_char_t_from_char_2_le(character) (((character) << 8) & F_utf_char_mask_char_2_le_d)  // Shift to second byte.
-  #define macro_f_utf_char_t_from_char_3_le(character) (((character) << 16) & F_utf_char_mask_char_3_le_d) // Shift to third byte.
-  #define macro_f_utf_char_t_from_char_4_le(character) (((character) << 24) & F_utf_char_mask_char_4_le_d) // Shift to fourth byte.
+  #define macro_f_utf_char_t_from_char_1_le(sequence) ((sequence) & F_utf_char_mask_char_1_le_d)         // Shift to first byte.
+  #define macro_f_utf_char_t_from_char_2_le(sequence) (((sequence) << 8) & F_utf_char_mask_char_2_le_d)  // Shift to second byte.
+  #define macro_f_utf_char_t_from_char_3_le(sequence) (((sequence) << 16) & F_utf_char_mask_char_3_le_d) // Shift to third byte.
+  #define macro_f_utf_char_t_from_char_4_le(sequence) (((sequence) << 24) & F_utf_char_mask_char_4_le_d) // Shift to fourth byte.
  
    #define F_utf_char_mask_byte_1_d F_utf_char_mask_byte_1_be_d
    #define F_utf_char_mask_byte_2_d F_utf_char_mask_byte_2_be_d
@@ -237,18 +237,18 @@ extern "C" {
    #define F_utf_char_mask_char_3_d F_utf_char_mask_char_3_be_d
    #define F_utf_char_mask_char_4_d F_utf_char_mask_char_4_be_d
  
-  #define macro_f_utf_char_t_to_char_1(character) macro_f_utf_char_t_to_char_1_be(character)
-  #define macro_f_utf_char_t_to_char_2(character) macro_f_utf_char_t_to_char_2_be(character)
-  #define macro_f_utf_char_t_to_char_3(character) macro_f_utf_char_t_to_char_3_be(character)
-  #define macro_f_utf_char_t_to_char_4(character) macro_f_utf_char_t_to_char_4_be(character)
+  #define macro_f_utf_char_t_to_char_1(sequence) macro_f_utf_char_t_to_char_1_be(sequence)
+  #define macro_f_utf_char_t_to_char_2(sequence) macro_f_utf_char_t_to_char_2_be(sequence)
+  #define macro_f_utf_char_t_to_char_3(sequence) macro_f_utf_char_t_to_char_3_be(sequence)
+  #define macro_f_utf_char_t_to_char_4(sequence) macro_f_utf_char_t_to_char_4_be(sequence)
  
-  #define macro_f_utf_char_t_from_char_1(character) macro_f_utf_char_t_from_char_1_be(character)
-  #define macro_f_utf_char_t_from_char_2(character) macro_f_utf_char_t_from_char_2_be(character)
-  #define macro_f_utf_char_t_from_char_3(character) macro_f_utf_char_t_from_char_3_be(character)
-  #define macro_f_utf_char_t_from_char_4(character) macro_f_utf_char_t_from_char_4_be(character)
+  #define macro_f_utf_char_t_from_char_1(sequence) macro_f_utf_char_t_from_char_1_be(sequence)
+  #define macro_f_utf_char_t_from_char_2(sequence) macro_f_utf_char_t_from_char_2_be(sequence)
+  #define macro_f_utf_char_t_from_char_3(sequence) macro_f_utf_char_t_from_char_3_be(sequence)
+  #define macro_f_utf_char_t_from_char_4(sequence) macro_f_utf_char_t_from_char_4_be(sequence)
  
-  #define macro_f_utf_char_t_width(character)    (macro_f_utf_byte_width(macro_f_utf_char_t_to_char_1_be(character)))
-  #define macro_f_utf_char_t_width_is(character) (macro_f_utf_byte_width_is(macro_f_utf_char_t_to_char_1_be(character)))
+  #define macro_f_utf_char_t_width(sequence)    (macro_f_utf_byte_width(macro_f_utf_char_t_to_char_1_be(sequence)))
+  #define macro_f_utf_char_t_width_is(sequence) (macro_f_utf_byte_width_is(macro_f_utf_char_t_to_char_1_be(sequence)))
  #endif // _di_f_utf_char_t_
  
  /**
@@ -300,16 +300,16 @@ extern "C" {
  #endif // _di_f_utf_string_t_
  
  /**
- * Define unicode special character widths.
+ * Define unicode special byte sequence widths.
   *
   * F_utf_width_*:
   *   - none:      Designate this is not a width value or has no width (aka: NULL).
   *   - ambiguous: Characters appear in East Asian DBCS and in SBCS.
- *   - full:      Wide character that has a equivilent to a narrow character.
- *   - half:      Narrow character that has a equivilent to a wide character.
- *   - narrow:    Narrow character, without a wide equivalent.
+ *   - full:      Wide byte sequence that has a equivilent to a narrow byte sequence.
+ *   - half:      Narrow byte sequence that has a equivilent to a wide byte sequence.
+ *   - narrow:    Narrow byte sequence, without a wide equivalent.
   *   - nuetral:   Characters that do not appear in East Asian DBCS codes.
- *   - wide:      Wide character, without a narrow equivalent.
+ *   - wide:      Wide byte sequence, without a narrow equivalent.
   */
  #ifndef _di_f_utf_widths_t_
    enum {
diff --git a/level_0/f_utf/c/utf/is.c b/level_0/f_utf/c/utf/is.c

index 635119d05d29b038f770c7208732bd07894126f2..f65749a5aa3f81cc0a27687ec96190a08f9ead81 100644 (file)
--- a/level_0/f_utf/c/utf/is.c
+++ b/level_0/f_utf/c/utf/is.c
@@ -3,6 +3,7 @@
  #include "../private-utf_alphabetic.h"
  #include "../private-utf_combining.h"
  #include "../private-utf_control.h"
+#include "../private-utf_decimal.h"
  #include "../private-utf_digit.h"
  #include "../private-utf_emoji.h"
  #include "../private-utf_numeric.h"
@@ -24,123 +25,121 @@ extern "C" {
  #endif
  
  #ifndef _di_f_utf_is_
-  f_status_t f_utf_is(const f_string_t character) {
+  f_status_t f_utf_is(const f_string_t sequence) {
  
-    return macro_f_utf_byte_width_is(*character);
+    return macro_f_utf_byte_width_is(*sequence);
    }
  #endif // _di_f_utf_is_
  
  #ifndef _di_f_utf_is_alphabetic_
-  f_status_t f_utf_is_alphabetic(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_alphabetic(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_alphabetic(character_utf);
+      return private_f_utf_character_is_alphabetic(utf);
      }
  
-    if (isalpha(*character)) {
-      return F_true;
-    }
+    if (isalpha(*sequence)) return F_true;
  
      return F_false;
    }
  #endif // _di_f_utf_is_alphabetic_
  
-#ifndef _di_f_utf_is_alphabetic_digit_
-  f_status_t f_utf_is_alphabetic_digit(const f_string_t character, const f_array_length_t width_max) {
+#ifndef _di_f_utf_is_alphabetic_decimal_
+  f_status_t f_utf_is_alphabetic_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_alphabetic_digit(character_utf);
+      return private_f_utf_character_is_alphabetic_decimal(utf, value);
      }
  
-    if (isalnum(*character)) {
+    if (isalpha(*sequence)) return F_true;
+
+    if (private_f_utf_character_is_decimal_for_ascii(*sequence, F_true, value) == F_true) {
        return F_true;
      }
  
      return F_false;
    }
-#endif // _di_f_utf_is_alphabetic_digit_
+#endif // _di_f_utf_is_alphabetic_decimal_
  
  #ifndef _di_f_utf_is_alphabetic_numeric_
-  f_status_t f_utf_is_alphabetic_numeric(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_alphabetic_numeric(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_alphabetic_numeric(character_utf);
+      return private_f_utf_character_is_alphabetic_numeric(utf);
      }
  
-    if (isalnum(*character)) {
-      return F_true;
-    }
+    if (isalnum(*sequence)) return F_true;
  
      return F_false;
    }
  #endif // _di_f_utf_is_alphabetic_numeric_
  
  #ifndef _di_f_utf_is_ascii_
-  f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_ascii(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
@@ -152,28 +151,28 @@ extern "C" {
  #endif // _di_f_utf_is_ascii_
  
  #ifndef _di_f_utf_is_combining_
-  f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_combining(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_combining(character_utf);
+      return private_f_utf_character_is_combining(utf);
      }
  
      // There are no ASCII combining characters.
@@ -182,94 +181,90 @@ extern "C" {
  #endif // _di_f_utf_is_combining_
  
  #ifndef _di_f_utf_is_control_
-  f_status_t f_utf_is_control(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_control(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_control(character_utf);
+      return private_f_utf_character_is_control(utf);
      }
  
-    if (iscntrl(*character)) {
-      return F_true;
-    }
+    if (iscntrl(*sequence)) return F_true;
  
      return F_false;
    }
  #endif // _di_f_utf_is_control_
  
  #ifndef _di_f_utf_is_control_code
-  f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_control_code(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_control_code(character_utf);
+      return private_f_utf_character_is_control_code(utf);
      }
  
-    if (iscntrl(*character)) {
-      return F_true;
-    }
+    if (iscntrl(*sequence)) return F_true;
  
      return F_false;
    }
  #endif // _di_f_utf_is_control_code_
  
  #ifndef _di_f_utf_is_control_format_
-  f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_control_format(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_control_format(character_utf);
+      return private_f_utf_character_is_control_format(utf);
      }
  
      // There are no ASCII control formats.
@@ -278,32 +273,32 @@ extern "C" {
  #endif // _di_f_utf_is_control_format_
  
  #ifndef _di_f_utf_is_control_picture_
-  f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_control_picture(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      if (macro_f_utf_byte_width_is(*character) != 3) {
+      if (macro_f_utf_byte_width_is(*sequence) != 3) {
          return F_false;
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_control_picture(character_utf);
+      return private_f_utf_character_is_control_picture(utf);
      }
  
      // There are no ASCII control pictures.
@@ -311,62 +306,89 @@ extern "C" {
    }
  #endif // _di_f_utf_is_control_picture_
  
-#ifndef _di_f_utf_is_digit_
-  f_status_t f_utf_is_digit(const f_string_t character, const f_array_length_t width_max) {
+#ifndef _di_f_utf_is_decimal_
+  f_status_t f_utf_is_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_digit(character_utf);
+      return private_f_utf_character_is_decimal(utf, F_true, value);
      }
  
-    if (isdigit(*character)) {
-      return F_true;
+    return private_f_utf_character_is_decimal_for_ascii(*sequence, F_false, value);
+  }
+#endif // _di_f_utf_is_decimal_
+
+#ifndef _di_f_utf_is_digit_
+  f_status_t f_utf_is_digit(const f_string_t sequence, const f_array_length_t width_max) {
+    #ifndef _di_level_0_parameter_checking_
+      if (width_max < 1) return F_status_set_error(F_parameter);
+    #endif // _di_level_0_parameter_checking_
+
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
+        return F_status_set_error(F_complete_not_utf);
+      }
+
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      f_utf_char_t utf = 0;
+
+      {
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
+        if (F_status_is_error(status)) return status;
+      }
+
+      return private_f_utf_character_is_digit(utf);
      }
  
+    if (isdigit(*sequence)) return F_true;
+
      return F_false;
    }
  #endif // _di_f_utf_is_digit_
  
  #ifndef _di_f_utf_is_emoji_
-  f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_emoji(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_emoji(character_utf);
+      return private_f_utf_character_is_emoji(utf);
      }
  
      return F_false;
@@ -374,9 +396,9 @@ extern "C" {
  #endif // _di_f_utf_is_emoji_
  
  #ifndef _di_f_utf_is_fragment_
-  f_status_t f_utf_is_fragment(const f_string_t character) {
+  f_status_t f_utf_is_fragment(const f_string_t sequence) {
  
-    if (macro_f_utf_byte_width_is(*character) == 1) {
+    if (macro_f_utf_byte_width_is(*sequence) == 1) {
        return F_true;
      }
  
@@ -385,107 +407,103 @@ extern "C" {
  #endif // _di_f_utf_is_fragment_
  
  #ifndef _di_f_utf_is_graph_
-  f_status_t f_utf_is_graph(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_graph(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      if (private_f_utf_character_is_control(character_utf)) {
+      if (private_f_utf_character_is_control(utf)) {
          return F_false;
        }
  
-      if (private_f_utf_character_is_whitespace(character_utf)) {
+      if (private_f_utf_character_is_whitespace(utf)) {
          return F_false;
        }
  
        // Zero-width characters are be treated as a non-graph.
-      if (private_f_utf_character_is_zero_width(character_utf)) {
+      if (private_f_utf_character_is_zero_width(utf)) {
          return F_false;
        }
  
        return F_true;
      }
  
-    if (isgraph(*character)) {
-      return F_true;
-    }
+    if (isgraph(*sequence)) return F_true;
  
      return F_false;
    }
  #endif // _di_f_utf_is_graph_
  
  #ifndef _di_f_utf_is_numeric_
-  f_status_t f_utf_is_numeric(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_numeric(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_numeric(character_utf);
+      return private_f_utf_character_is_numeric(utf);
      }
  
-    if (isdigit(*character)) {
-      return F_true;
-    }
+    if (isdigit(*sequence)) return F_true;
  
      return F_false;
    }
  #endif // _di_f_utf_is_numeric_
  
  #ifndef _di_f_utf_is_phonetic_
-  f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_phonetic(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_phonetic(character_utf);
+      return private_f_utf_character_is_phonetic(utf);
      }
  
      // There are no ASCII phonetic characters.
@@ -494,28 +512,28 @@ extern "C" {
  #endif // _di_f_utf_is_phonetic_
  
  #ifndef _di_f_utf_is_private_
-  f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_private(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_private(character_utf);
+      return private_f_utf_character_is_private(utf);
      }
  
      // There are no ASCII private characters.
@@ -524,57 +542,57 @@ extern "C" {
  #endif // _di_f_utf_is_private_
  
  #ifndef _di_f_utf_is_punctuation_
-  f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_punctuation(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_punctuation(character_utf);
+      return private_f_utf_character_is_punctuation(utf);
      }
  
      // ASCII: U+0021 '!' to U+0023 '#'.
-    if (character[0] > 0x20 && character[0] < 0x24) {
+    if (sequence[0] > 0x20 && sequence[0] < 0x24) {
        return F_true;
      }
  
      // ASCII: U+0025 '%' to U+002A '*'.
-    if (character[0] > 0x24 && character[0] < 0x2b) {
+    if (sequence[0] > 0x24 && sequence[0] < 0x2b) {
        return F_true;
      }
  
      // ASCII: U+002C ',' to U+002F '/'.
-    if (character[0] > 0x2b && character[0] < 0x30) {
+    if (sequence[0] > 0x2b && sequence[0] < 0x30) {
        return F_true;
      }
  
      // ASCII: U+003A ':', U+003B ';', U+003F '?', or U+0040 '@'.
-    if (character[0] == 0x3a || character[0] == 0x3b || character[0] == 0x3f || character[0] == 0x40) {
+    if (sequence[0] == 0x3a || sequence[0] == 0x3b || sequence[0] == 0x3f || sequence[0] == 0x40) {
        return F_true;
      }
  
      // ASCII: U+005B '[' to U+005D ']'.
-    if (character[0] > 0x5a && character[0] < 0x5e) {
+    if (sequence[0] > 0x5a && sequence[0] < 0x5e) {
        return F_true;
      }
  
      // ASCII: U+005F '_', U+007B '{', or U+007D '}'.
-    if (character[0] == 0x5f || character[0] == 0x7b || character[0] == 0x7d) {
+    if (sequence[0] == 0x5f || sequence[0] == 0x7b || sequence[0] == 0x7d) {
        return F_true;
      }
  
@@ -583,28 +601,28 @@ extern "C" {
  #endif // _di_f_utf_is_punctuation_
  
  #ifndef _di_f_utf_is_subscript_
-  f_status_t f_utf_is_subscript(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_subscript(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_subscript(character_utf);
+      return private_f_utf_character_is_subscript(utf);
      }
  
      return F_false;
@@ -612,28 +630,28 @@ extern "C" {
  #endif // _di_f_utf_is_subscript_
  
  #ifndef _di_f_utf_is_superscript_
-  f_status_t f_utf_is_superscript(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_superscript(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_superscript(character_utf);
+      return private_f_utf_character_is_superscript(utf);
      }
  
      return F_false;
@@ -641,42 +659,42 @@ extern "C" {
  #endif // _di_f_utf_is_superscript_
  
  #ifndef _di_f_utf_is_symbol_
-  f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_symbol(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_symbol(character_utf);
+      return private_f_utf_character_is_symbol(utf);
      }
  
      // ASCII: U+0024 ('$') or U+002B ('+').
-    if (character[0] == 0x24 || character[0] == 0x2b) {
+    if (sequence[0] == 0x24 || sequence[0] == 0x2b) {
        return F_true;
      }
  
      // ASCII: U+003C ('<') to U+003E ('>').
-    if (character[0] >= 0x3c && character[0] <= 0x3e) {
+    if (sequence[0] >= 0x3c && sequence[0] <= 0x3e) {
        return F_true;
      }
  
      // ASCII: U+005E ('^'), U+0060 ('`'), U+007C ('|'), or U+007E ('~').
-    if (character[0] == 0x5e || character[0] == 0x60 || character[0] == 0x7c || character[0] == 0x7e) {
+    if (sequence[0] == 0x5e || sequence[0] == 0x60 || sequence[0] == 0x7c || sequence[0] == 0x7e) {
        return F_true;
      }
  
@@ -685,28 +703,28 @@ extern "C" {
  #endif // _di_f_utf_is_symbol_
  
  #ifndef _di_f_utf_is_unassigned_
-  f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_unassigned(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_unassigned(character_utf);
+      return private_f_utf_character_is_unassigned(utf);
      }
  
      // ASCII are never unassigned.
@@ -715,28 +733,28 @@ extern "C" {
  #endif // _di_f_utf_is_unassigned_
  
  #ifndef _di_f_utf_is_valid_
-  f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_valid(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_valid(character_utf);
+      return private_f_utf_character_is_valid(utf);
      }
  
      // ASCII are valid.
@@ -745,61 +763,59 @@ extern "C" {
  #endif // _di_f_utf_is_valid_
  
  #ifndef _di_f_utf_is_whitespace_
-  f_status_t f_utf_is_whitespace(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_whitespace(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_whitespace(character_utf);
+      return private_f_utf_character_is_whitespace(utf);
      }
  
-    if (isspace(*character)) {
-      return F_true;
-    }
+    if (isspace(*sequence)) return F_true;
  
      return F_false;
    }
  #endif // _di_f_utf_is_whitespace_
  
  #ifndef _di_f_utf_is_whitespace_modifier_
-  f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_whitespace_modifier(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_whitespace_modifier(character_utf);
+      return private_f_utf_character_is_whitespace_modifier(utf);
      }
  
      // There are no ASCII whitespace modifiers.
@@ -808,28 +824,28 @@ extern "C" {
  #endif // _di_f_utf_is_whitespace_modifier_
  
  #ifndef _di_f_utf_is_whitespace_other_
-  f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_whitespace_other(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_whitespace_other(character_utf);
+      return private_f_utf_character_is_whitespace_other(utf);
      }
  
      // There are no ASCII whitespace other.
@@ -838,25 +854,25 @@ extern "C" {
  #endif // _di_f_utf_is_whitespace_other_
  
  #ifndef _di_f_utf_is_wide_
-  f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_wide(const f_string_t sequence, const f_array_length_t width_max) {
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_wide(character_utf);
+      return private_f_utf_character_is_wide(utf);
      }
  
      // There are no wide ASCII characters.
@@ -865,31 +881,31 @@ extern "C" {
  #endif // _di_f_utf_is_wide_
  
  #ifndef _di_f_utf_is_word_
-  f_status_t f_utf_is_word(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+  f_status_t f_utf_is_word(const f_string_t sequence, const f_array_length_t width_max, const bool strict) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_word(character_utf, strict);
+      return private_f_utf_character_is_word(utf, strict);
      }
  
-    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0]) {
+    if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0]) {
        return F_true;
      }
  
@@ -898,31 +914,31 @@ extern "C" {
  #endif // _di_f_utf_is_word_
  
  #ifndef _di_f_utf_is_word_dash_
-  f_status_t f_utf_is_word_dash(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+  f_status_t f_utf_is_word_dash(const f_string_t sequence, const f_array_length_t width_max, const bool strict) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_word_dash(character_utf, strict);
+      return private_f_utf_character_is_word_dash(utf, strict);
      }
  
-    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0]) {
+    if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0] || *sequence == f_string_ascii_minus_s.string[0]) {
        return F_true;
      }
  
@@ -931,31 +947,31 @@ extern "C" {
  #endif // _di_f_utf_is_word_dash_
  
  #ifndef _di_f_utf_is_word_dash_plus_
-  f_status_t f_utf_is_word_dash_plus(const f_string_t character, const f_array_length_t width_max, const bool strict) {
+  f_status_t f_utf_is_word_dash_plus(const f_string_t sequence, const f_array_length_t width_max, const bool strict) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_word_dash_plus(character_utf, strict);
+      return private_f_utf_character_is_word_dash_plus(utf, strict);
      }
  
-    if (isalnum(*character) || *character == f_string_ascii_underscore_s.string[0] || *character == f_string_ascii_minus_s.string[0] || *character == f_string_ascii_plus_s.string[0]) {
+    if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0] || *sequence == f_string_ascii_minus_s.string[0] || *sequence == f_string_ascii_plus_s.string[0]) {
        return F_true;
      }
  
@@ -964,38 +980,38 @@ extern "C" {
  #endif // _di_f_utf_is_word_dash_plus_
  
  #ifndef _di_f_utf_is_zero_width_
-  f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max) {
+  f_status_t f_utf_is_zero_width(const f_string_t sequence, const f_array_length_t width_max) {
      #ifndef _di_level_0_parameter_checking_
        if (width_max < 1) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (macro_f_utf_byte_width_is(*character)) {
-      if (macro_f_utf_byte_width_is(*character) > width_max) {
+    if (macro_f_utf_byte_width_is(*sequence)) {
+      if (macro_f_utf_byte_width_is(*sequence) > width_max) {
          return F_status_set_error(F_complete_not_utf);
        }
  
-      if (macro_f_utf_byte_width_is(*character) == 1) {
+      if (macro_f_utf_byte_width_is(*sequence) == 1) {
          return F_status_set_error(F_utf_fragment);
        }
  
-      f_utf_char_t character_utf = 0;
+      f_utf_char_t utf = 0;
  
        {
-        const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+        const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
          if (F_status_is_error(status)) return status;
        }
  
-      return private_f_utf_character_is_zero_width(character_utf);
+      return private_f_utf_character_is_zero_width(utf);
      }
  
      // These control characters are considered zero-width spaces.
-    if (*character >= 0x00 && *character <= 0x08) {
+    if (*sequence >= 0x00 && *sequence <= 0x08) {
        return F_true;
      }
-    else if (*character >= 0x0c && *character <= 0x1f) {
+    else if (*sequence >= 0x0c && *sequence <= 0x1f) {
        return F_true;
      }
-    else if (*character == 0x7f) {
+    else if (*sequence == 0x7f) {
        return F_true;
      }
  
diff --git a/level_0/f_utf/c/utf/is.h b/level_0/f_utf/c/utf/is.h

index c75f85b0f5bf5b37c7daeff2c2d297e84c0cbff0..ca6c9b97cc8d3b1aa297515fa4d8115f5856765c 100644 (file)
--- a/level_0/f_utf/c/utf/is.h
+++ b/level_0/f_utf/c/utf/is.h
@@ -60,16 +60,39 @@ extern "C" {
  /**
   * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or digit character.
   *
- * Digit characters are decimal digits and letter numbers.
+ * Decimal characters are decimal digits.
   *
   * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
   *
+ * Decimal refers to a unit of base-10.
+ * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16.
+ * If base-10 is desired, then simply ignore values greater than 9.
+ * For example, a base-16 character 'a' would result in the integer 10.
+ * Just ignore the value.
+ * This also processes large values such as roman numerals.
+ * Roman Numerals, however conflict with the natural hexidecimal numbers.
+ * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals.
+ * For example, the Roman Numeral 'Ⅿ' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000.
+ *
+ * This function always returns F_true for valid decimal digits to avoid confusion between alphabetic and digits in regards to the base unit.
+ * The 'F' is a character and a base-16 digit.
+ * If this were to return F_false because it is greater than the requested base-12 then there would be confusion on whether or not 'F' is alphabetic.
+ * If the determined digit is greater than the requested base, the 0xffff is assigned to value.
+ *
   * @param sequence
   *   The byte sequence to validate as a character.
   *   There must be enough space allocated to compare against, as limited by width_max.
   * @param width_max
   *   The maximum width available for checking.
   *   Can be anything greater than 0.
+ * @param value
+ *   (optional) The integer representation of the character if the character is a decimal.
+ *   If specified, value is set to 0xffff to represent no known representation.
+ *   If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ *   If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ *   For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ *   If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ *   Set to NULL to not use.
   *
   * @return
   *   F_true if a UTF-8 alphabet character.
@@ -81,12 +104,35 @@ extern "C" {
   *
   * @see isalnum()
   */
+#ifndef _di_f_utf_is_alphabetic_decimal_
+  extern f_status_t f_utf_is_alphabetic_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value);
+#endif // _di_f_utf_is_alphabetic_decimal_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
+ *
+ * Digit characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * @param sequence
+ *   The byte sequence to validate as a character.
+ *
+ * @return
+ *   F_true if a UTF-8 alphabetic-digit character.
+ *   F_false if not a UTF-8 alphabetic-digit character.
+ *
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ *   F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see isalnum()
+ */
  #ifndef _di_f_utf_is_alphabetic_digit_
    extern f_status_t f_utf_is_alphabetic_digit(const f_string_t sequence, const f_array_length_t width_max);
  #endif // _di_f_utf_is_alphabetic_digit_
  
  /**
- * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character.
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character.
   *
   * Numeric characters are decimal digits, letter numbers, and number-like, such as 1/2 (½) or superscript 2 (²).
   *
@@ -105,7 +151,8 @@ extern "C" {
   *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *   F_utf_not (with error bit) if Unicode is an invalid Unicode character.
   *
- * @see isalnum()
+ * @see isalpha()
+ * @see isdigit()
   */
  #ifndef _di_f_utf_is_alphabetic_numeric_
    extern f_status_t f_utf_is_alphabetic_numeric(const f_string_t sequence, const f_array_length_t width_max);
@@ -255,8 +302,64 @@ extern "C" {
  #endif // _di_f_utf_is_control_picture_
  
  /**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 decimal character.
+ *
+ * Decimal characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * Decimal refers to a unit of base-10.
+ * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16.
+ * If base-10 is desired, then simply ignore values greater than 9.
+ * For example, a base-16 character 'a' would result in the integer 10.
+ * Just ignore the value.
+ * This also processes large values such as roman numerals.
+ * Roman Numerals, however conflict with the natural hexidecimal numbers.
+ * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals.
+ * For example, the Roman Numeral 'Ⅿ' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000.
+ *
+ * This function only returns F_true for valid decimal digits within the requested base.
+ *
+ * @param sequence
+ *   The byte sequence to validate as a character.
+ *   There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ *   The maximum width available for checking.
+ *   Can be anything greater than 0.
+ * @param base
+ *   (optional) The base digit to specify (up to base 16).
+ *   Set to 0 to not use.
+ *   This is ignored when value is NULL.
+ * @param value
+ *   (optional) The integer representation of the character if the character is a decimal.
+ *   If specified, value is set to 0xffff to represent no known representation.
+ *   If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ *   If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ *   For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ *   If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ *   Set to NULL to not use.
+ *
+ * @return
+ *   F_true if a UTF-8 decimal character.
+ *   F_false if not a UTF-8 decimal character.
+ *
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ *   F_utf_not (with error bit) if Unicode is an invalid Unicode character.
+ *
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_is_decimal_
+  extern f_status_t f_utf_is_decimal(const f_string_t sequence, const f_array_length_t width_max, uint32_t * const value);
+#endif // _di_f_utf_is_decimal_
+
+/**
   * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
   *
+ * Digit characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
   * @param sequence
   *   The byte sequence to validate as a character.
   *   There must be enough space allocated to compare against, as limited by width_max.
@@ -688,7 +791,7 @@ extern "C" {
  /**
   * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
   *
- * A word character is alpha-digit or an underscore '_'.
+ * A word character is alphabetic-decimal or an underscore '_'.
   *
   * @param sequence
   *   The byte sequence to validate as a character.
@@ -718,7 +821,7 @@ extern "C" {
  /**
   * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character.
   *
- * A word dash character is alpha-digit, an underscore '_' or a dash '-'.
+ * A word dash character is alphabetic-decimal, an underscore '_' or a dash '-'.
   *
   * Unicode appears to refer to dashes that connect words as a hyphen.
   * Therefore, only these hyphens are considered dashes for the purposes of this function.
@@ -753,7 +856,7 @@ extern "C" {
  /**
   * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
   *
- * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
+ * A word dash plus character is alphabetic-decimal, an underscore '_', a dash '-', or a plus '+'.
   *
   * Unicode appears to refer to dashes that connect words as a hyphen.
   * Therefore, only these hyphens are considered dashes for the purposes of this function.
diff --git a/level_0/f_utf/c/utf/is_character.c b/level_0/f_utf/c/utf/is_character.c

index 53f819257974bde14628b31a064bf1e828e63fd0..381260b196aa37e71c27c41624c2f8d7a73d518b 100644 (file)
--- a/level_0/f_utf/c/utf/is_character.c
+++ b/level_0/f_utf/c/utf/is_character.c
@@ -3,6 +3,7 @@
  #include "../private-utf_alphabetic.h"
  #include "../private-utf_combining.h"
  #include "../private-utf_control.h"
+#include "../private-utf_decimal.h"
  #include "../private-utf_digit.h"
  #include "../private-utf_emoji.h"
  #include "../private-utf_numeric.h"
@@ -57,8 +58,29 @@ extern "C" {
    }
  #endif // _di_f_utf_character_is_alphabetic_
  
+#ifndef _di_f_utf_character_is_alphabetic_decimal_
+  f_status_t f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value) {
+
+    if (macro_f_utf_char_t_width_is(sequence)) {
+      if (macro_f_utf_char_t_width_is(sequence) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_alphabetic_decimal(sequence, value);
+    }
+
+    if (isalpha(macro_f_utf_char_t_to_char_1(sequence))) return F_true;
+
+    if (private_f_utf_character_is_decimal_for_ascii(macro_f_utf_char_t_to_char_1(sequence), F_true, value) == F_true) {
+      return F_true;
+    }
+
+    return F_false;
+  }
+#endif // _di_f_utf_character_is_alphabetic_decimal_
+
  #ifndef _di_f_utf_character_is_alphabetic_digit_
-  f_status_t f_utf_character_is_alpha_digit(const f_utf_char_t sequence) {
+  f_status_t f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence) {
  
      if (macro_f_utf_char_t_width_is(sequence)) {
        if (macro_f_utf_char_t_width_is(sequence) == 1) {
@@ -192,6 +214,21 @@ extern "C" {
    }
  #endif // _di_f_utf_character_is_control_picture_
  
+#ifndef _di_f_utf_character_is_decimal_
+  f_status_t f_utf_character_is_decimal(const f_utf_char_t sequence, uint32_t * const value) {
+
+    if (macro_f_utf_char_t_width_is(sequence)) {
+      if (macro_f_utf_char_t_width_is(sequence) == 1) {
+        return F_status_set_error(F_utf_fragment);
+      }
+
+      return private_f_utf_character_is_decimal(sequence, F_false, value);
+    }
+
+    return private_f_utf_character_is_decimal_for_ascii(macro_f_utf_char_t_to_char_1(sequence), F_false, value);
+  }
+#endif // _di_f_utf_character_is_decimal_
+
  #ifndef _di_f_utf_character_is_digit_
    f_status_t f_utf_character_is_digit(const f_utf_char_t sequence) {
  
diff --git a/level_0/f_utf/c/utf/is_character.h b/level_0/f_utf/c/utf/is_character.h

index 6c331ac59e763fc4f67ef76fc88e497b5391bf1b..9c850727732dc0c7f8a0a253fad4cb7110bc22d3 100644 (file)
--- a/level_0/f_utf/c/utf/is_character.h
+++ b/level_0/f_utf/c/utf/is_character.h
@@ -57,7 +57,54 @@ extern "C" {
  /**
   * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
   *
- * Digit characters are decimal digits and letter numbers.
+ * Decimal characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * Decimal refers to a unit of base-10.
+ * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16.
+ * If base-10 is desired, then simply ignore values greater than 9.
+ * For example, a base-16 character 'a' would result in the integer 10.
+ * Just ignore the value.
+ * This also processes large values such as roman numerals.
+ * Roman Numerals, however conflict with the natural hexidecimal numbers.
+ * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals.
+ * For example, the Roman Numeral 'Ⅿ' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000.
+ *
+ * This function always returns F_true for valid decimal digits to avoid confusion between alphabetic and digits in regards to the base unit.
+ * The 'F' is a character and a base-16 digit.
+ * If this were to return F_false because it is greater than the requested base-12 then there would be confusion on whether or not 'F' is alphabetic.
+ * If the determined digit is greater than the requested base, the 0xffff is assigned to value.
+ *
+ * @param sequence
+ *   The byte sequence to validate as a character.
+ * @param value
+ *   (optional) The integer representation of the character if the character is a decimal.
+ *   If specified, value is set to 0xffff to represent no known representation.
+ *   If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ *   If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ *   For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ *   If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ *   Set to NULL to not use.
+ *
+ * @return
+ *   F_true if a UTF-8 alphabetic-decimal character.
+ *   F_false if not a UTF-8 alphabetic-decimal character.
+ *
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ *   F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see isalpha()
+ * @see isdigit()
+ */
+#ifndef _di_f_utf_character_is_alphabetic_decimal_
+  extern f_status_t f_utf_character_is_alphabetic_decimal(const f_utf_char_t sequence, uint32_t * const value);
+#endif // _di_f_utf_character_is_alphabetic_decimal_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or digit character.
+ *
+ * Digit characters are decimal digits.
   *
   * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
   *
@@ -65,8 +112,8 @@ extern "C" {
   *   The byte sequence to validate as a character.
   *
   * @return
- *   F_true if a UTF-8 alpha-digit character.
- *   F_false if not a UTF-8 alpha-digit character.
+ *   F_true if a UTF-8 alphabetic-digit character.
+ *   F_false if not a UTF-8 alphabetic-digit character.
   *
   *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *   F_utf_not (with error bit) if unicode is an invalid Unicode character.
@@ -74,7 +121,7 @@ extern "C" {
   * @see isalnum()
   */
  #ifndef _di_f_utf_character_is_alphabetic_digit_
-  extern f_status_t f_utf_character_is_alpha_digit(const f_utf_char_t sequence);
+  extern f_status_t f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence);
  #endif // _di_f_utf_character_is_alphabetic_digit_
  
  /**
@@ -215,9 +262,52 @@ extern "C" {
  #endif // _di_f_utf_character_is_control_picture_
  
  /**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 decimal character.
+ *
+ * Decimal characters are decimal digits.
+ *
+ * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
+ *
+ * Decimal refers to a unit of base-10.
+ * To simplify the necessary code, this function automatically handles different base units if the number can be converted into a integer whose value is less than 2^16.
+ * If base-10 is desired, then simply ignore values greater than 9.
+ * For example, a base-16 character 'a' would result in the integer 10.
+ * Just ignore the value.
+ * This also processes large values such as roman numerals.
+ * Roman Numerals, however conflict with the natural hexidecimal numbers.
+ * To avoid this only Unicode Roman Numerals found in range U+2160 to U+2188 are treated as their respective numerals.
+ * For example, the Roman Numeral 'Ⅿ' (U+216F) represents 1000 rather than having 'M' (U+004D) representing 1000.
+ *
+ * This function only returns F_true for valid decimal digits within the requested base.
+ *
+ * @param sequence
+ *   The byte sequence to validate as a character.
+ * @param value
+ *   (optional) The integer representation of the character if the character is a decimal.
+ *   If specified, value is set to 0xffff to represent no known representation.
+ *   If specified and is initially a value of 0, then this represents the operating normall has decimal (base-10).
+ *   If specified and is initially a value from 1 to 16, then this represents operating as that base unit.
+ *   For example, if value is 16, then this function will operate "is hexidecimal" rather than "is decimal".
+ *   If specified and is initially a value of 0xffff, then this will grab all known integer digits.
+ *   Set to NULL to not use.
+ *
+ * @return
+ *   F_true if a UTF-8 decimal character.
+ *   F_false if not a UTF-8 decimal character.
+ *
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
+ *   F_utf_not (with error bit) if unicode is an invalid Unicode character.
+ *
+ * @see isdecimal()
+ */
+#ifndef _di_f_utf_character_is_decimal_
+  extern f_status_t f_utf_character_is_decimal(const f_utf_char_t sequence, uint32_t * const value);
+#endif // _di_f_utf_character_is_decimal_
+
+/**
   * Check to see if the entire byte block of the character is an ASCII or UTF-8 digit character.
   *
- * Digit characters are decimal digits and letter numbers.
+ * Digit characters are decimal digits.
   *
   * This does not include number-like, such as 1/2 (½) or superscript 2 (²).
   *
@@ -638,7 +728,7 @@ extern "C" {
  /**
   * Check to see if the entire byte block of the character is an ASCII or UTF-8 word, dash, or plus character.
   *
- * A word dash plus character is alpha-digit, an underscore '_', a dash '-', or a plus '+'.
+ * A word dash plus character is alphabetic-decimal, an underscore '_', a dash '-', or a plus '+'.
   *
   * Unicode appears to refer to dashes that connect words as a hyphen.
   * Therefore, only these hyphens are considered dashes for the purposes of this function.
diff --git a/level_0/f_utf/data/build/settings b/level_0/f_utf/data/build/settings

index 716f9b4789128d9a62e8245ca3ab87f4f6b1a396..3d5bcfd6add1c863f237f9531fc54c312f907fb5 100644 (file)
--- a/level_0/f_utf/data/build/settings
+++ b/level_0/f_utf/data/build/settings
@@ -20,7 +20,7 @@ build_language c
  build_libraries -lc
  build_libraries-individual -lf_memory -lf_string
  
-build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c
+build_sources_library utf.c private-utf.c private-utf_alphabetic.c private-utf_combining.c private-utf_control.c private-utf_decimal.c private-utf_digit.c private-utf_emoji.c private-utf_numeric.c private-utf_phonetic.c private-utf_private.c private-utf_punctuation.c private-utf_subscript.c private-utf_superscript.c private-utf_symbol.c private-utf_valid.c private-utf_whitespace.c private-utf_wide.c private-utf_word.c private-utf_zero_width.c
  build_sources_library utf/common.c utf/convert.c utf/dynamic.c utf/is.c utf/is_character.c utf/map.c utf/map_multi.c utf/static.c utf/string.c utf/triple.c
  build_sources_library utf/private-is_unassigned.c utf/private-dynamic.c utf/private-map.c utf/private-map_multi.c utf/private-string.c utf/private-triple.c
  
diff --git a/level_0/f_utf/data/build/settings-tests b/level_0/f_utf/data/build/settings-tests

index e1b55577c79d89f510129ba8d5010b226ccba5b0..c755f5dbf8f3df53df387e4305855070b990ad3b 100644 (file)
--- a/level_0/f_utf/data/build/settings-tests
+++ b/level_0/f_utf/data/build/settings-tests
@@ -29,6 +29,7 @@ build_sources_program test-utf-append.c test-utf-append_assure.c test-utf-append
  build_sources_program test-utf-character_is_alphabetic.c test-utf-is_alphabetic.c
  build_sources_program test-utf-character_is_combining.c test-utf-is_combining.c
  build_sources_program test-utf-character_is_control.c test-utf-is_control.c
+build_sources_program test-utf-character_is_decimal.c test-utf-is_decimal.c
  build_sources_program test-utf-character_is_digit.c test-utf-is_digit.c
  build_sources_program test-utf-character_is_emoji.c test-utf-is_emoji.c
  build_sources_program test-utf-character_is_numeric.c test-utf-is_numeric.c
diff --git a/level_0/f_utf/data/tests/bytesequences/decimal-all.txt b/level_0/f_utf/data/tests/bytesequences/decimal-all.txt

new file mode 100644 (file)

index 0000000..5c312bf
--- /dev/null
+++ b/level_0/f_utf/data/tests/bytesequences/decimal-all.txt
@@ -0,0 +1,701 @@
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+55712
+55713
+55714
+55715
+55716
+55717
+55718
+55719
+55720
+55721
+56240
+56241
+56242
+56243
+56244
+56245
+56246
+56247
+56248
+56249
+57216
+57217
+57218
+57219
+57220
+57221
+57222
+57223
+57224
+57225
+14722470
+14722471
+14722472
+14722473
+14722474
+14722475
+14722476
+14722477
+14722478
+14722479
+14722982
+14722983
+14722984
+14722985
+14722986
+14722987
+14722988
+14722989
+14722990
+14722991
+14723494
+14723495
+14723496
+14723497
+14723498
+14723499
+14723500
+14723501
+14723502
+14723503
+14724006
+14724007
+14724008
+14724009
+14724010
+14724011
+14724012
+14724013
+14724014
+14724015
+14724518
+14724519
+14724520
+14724521
+14724522
+14724523
+14724524
+14724525
+14724526
+14724527
+14725030
+14725031
+14725032
+14725033
+14725034
+14725035
+14725036
+14725037
+14725038
+14725039
+14725542
+14725543
+14725544
+14725545
+14725546
+14725547
+14725548
+14725549
+14725550
+14725551
+14726054
+14726055
+14726056
+14726057
+14726058
+14726059
+14726060
+14726061
+14726062
+14726063
+14726566
+14726567
+14726568
+14726569
+14726570
+14726571
+14726572
+14726573
+14726574
+14726575
+14727078
+14727079
+14727080
+14727081
+14727082
+14727083
+14727084
+14727085
+14727086
+14727087
+14727568
+14727569
+14727570
+14727571
+14727572
+14727573
+14727574
+14727575
+14727576
+14727577
+14728080
+14728081
+14728082
+14728083
+14728084
+14728085
+14728086
+14728087
+14728088
+14728089
+14728352
+14728353
+14728354
+14728355
+14728356
+14728357
+14728358
+14728359
+14728360
+14728361
+14778752
+14778753
+14778754
+14778755
+14778756
+14778757
+14778758
+14778759
+14778760
+14778761
+14779024
+14779025
+14779026
+14779027
+14779028
+14779029
+14779030
+14779031
+14779032
+14779033
+14786464
+14786465
+14786466
+14786467
+14786468
+14786469
+14786470
+14786471
+14786472
+14786473
+14786704
+14786705
+14786706
+14786707
+14786708
+14786709
+14786710
+14786711
+14786712
+14786713
+14787974
+14787975
+14787976
+14787977
+14787978
+14787979
+14787980
+14787981
+14787982
+14787983
+14788496
+14788497
+14788498
+14788499
+14788500
+14788501
+14788502
+14788503
+14788504
+14788505
+14789248
+14789249
+14789250
+14789251
+14789252
+14789253
+14789254
+14789255
+14789256
+14789257
+14789264
+14789265
+14789266
+14789267
+14789268
+14789269
+14789270
+14789271
+14789272
+14789273
+14790032
+14790033
+14790034
+14790035
+14790036
+14790037
+14790038
+14790039
+14790040
+14790041
+14790320
+14790321
+14790322
+14790323
+14790324
+14790325
+14790326
+14790327
+14790328
+14790329
+14791040
+14791041
+14791042
+14791043
+14791044
+14791045
+14791046
+14791047
+14791048
+14791049
+14791056
+14791057
+14791058
+14791059
+14791060
+14791061
+14791062
+14791063
+14791064
+14791065
+14845344
+14845345
+14845346
+14845347
+14845348
+14845349
+14845350
+14845351
+14845352
+14845353
+14845354
+14845355
+14845356
+14845357
+14845358
+14845359
+14845360
+14845361
+14845362
+14845363
+14845364
+14845365
+14845366
+14845367
+14845368
+14845369
+14845370
+14845371
+14845372
+14845373
+14845374
+14845375
+14845568
+14845569
+14845570
+14845571
+14845572
+14845573
+14845574
+14845575
+14845576
+15374496
+15374497
+15374498
+15374499
+15374500
+15374501
+15374502
+15374503
+15374504
+15374505
+15377296
+15377297
+15377298
+15377299
+15377300
+15377301
+15377302
+15377303
+15377304
+15377305
+15377536
+15377537
+15377538
+15377539
+15377540
+15377541
+15377542
+15377543
+15377544
+15377545
+15378320
+15378321
+15378322
+15378323
+15378324
+15378325
+15378326
+15378327
+15378328
+15378329
+15378352
+15378353
+15378354
+15378355
+15378356
+15378357
+15378358
+15378359
+15378360
+15378361
+15378832
+15378833
+15378834
+15378835
+15378836
+15378837
+15378838
+15378839
+15378840
+15378841
+15380400
+15380401
+15380402
+15380403
+15380404
+15380405
+15380406
+15380407
+15380408
+15380409
+15711376
+15711377
+15711378
+15711379
+15711380
+15711381
+15711382
+15711383
+15711384
+15711385
+4036006560
+4036006561
+4036006562
+4036006563
+4036006564
+4036006565
+4036006566
+4036006567
+4036006568
+4036006569
+4036015280
+4036015281
+4036015282
+4036015283
+4036015284
+4036015285
+4036015286
+4036015287
+4036015288
+4036015289
+4036067750
+4036067751
+4036067752
+4036067753
+4036067754
+4036067755
+4036067756
+4036067757
+4036067758
+4036067759
+4036068272
+4036068273
+4036068274
+4036068275
+4036068276
+4036068277
+4036068278
+4036068279
+4036068280
+4036068281
+4036068534
+4036068535
+4036068536
+4036068537
+4036068538
+4036068539
+4036068540
+4036068541
+4036068542
+4036068543
+4036069264
+4036069265
+4036069266
+4036069267
+4036069268
+4036069269
+4036069270
+4036069271
+4036069272
+4036069273
+4036070320
+4036070321
+4036070322
+4036070323
+4036070324
+4036070325
+4036070326
+4036070327
+4036070328
+4036070329
+4036071824
+4036071825
+4036071826
+4036071827
+4036071828
+4036071829
+4036071830
+4036071831
+4036071832
+4036071833
+4036072336
+4036072337
+4036072338
+4036072339
+4036072340
+4036072341
+4036072342
+4036072343
+4036072344
+4036072345
+4036073872
+4036073873
+4036073874
+4036073875
+4036073876
+4036073877
+4036073878
+4036073879
+4036073880
+4036073881
+4036074368
+4036074369
+4036074370
+4036074371
+4036074372
+4036074373
+4036074374
+4036074375
+4036074376
+4036074377
+4036074672
+4036074673
+4036074674
+4036074675
+4036074676
+4036074677
+4036074678
+4036074679
+4036074680
+4036074681
+4036076448
+4036076449
+4036076450
+4036076451
+4036076452
+4036076453
+4036076454
+4036076455
+4036076456
+4036076457
+4036076944
+4036076945
+4036076946
+4036076947
+4036076948
+4036076949
+4036076950
+4036076951
+4036076952
+4036076953
+4036080016
+4036080017
+4036080018
+4036080019
+4036080020
+4036080021
+4036080022
+4036080023
+4036080024
+4036080025
+4036081040
+4036081041
+4036081042
+4036081043
+4036081044
+4036081045
+4036081046
+4036081047
+4036081048
+4036081049
+4036081312
+4036081313
+4036081314
+4036081315
+4036081316
+4036081317
+4036081318
+4036081319
+4036081320
+4036081321
+4036405664
+4036405665
+4036405666
+4036405667
+4036405668
+4036405669
+4036405670
+4036405671
+4036405672
+4036405673
+4036406144
+4036406145
+4036406146
+4036406147
+4036406148
+4036406149
+4036406150
+4036406151
+4036406152
+4036406153
+4036406672
+4036406673
+4036406674
+4036406675
+4036406676
+4036406677
+4036406678
+4036406679
+4036406680
+4036406681
+4036861838
+4036861839
+4036861840
+4036861841
+4036861842
+4036861843
+4036861844
+4036861845
+4036861846
+4036861847
+4036861848
+4036861849
+4036861850
+4036861851
+4036861852
+4036861853
+4036861854
+4036861855
+4036861856
+4036861857
+4036861858
+4036861859
+4036861860
+4036861861
+4036861862
+4036861863
+4036861864
+4036861865
+4036861866
+4036861867
+4036861868
+4036861869
+4036861870
+4036861871
+4036861872
+4036861873
+4036861874
+4036861875
+4036861876
+4036861877
+4036861878
+4036861879
+4036861880
+4036861881
+4036861882
+4036861883
+4036861884
+4036861885
+4036861886
+4036861887
+4036920704
+4036920705
+4036920706
+4036920707
+4036920708
+4036920709
+4036920710
+4036920711
+4036920712
+4036920713
+4036922288
+4036922289
+4036922290
+4036922291
+4036922292
+4036922293
+4036922294
+4036922295
+4036922296
+4036922297
+4036928912
+4036928913
+4036928914
+4036928915
+4036928916
+4036928917
+4036928918
+4036928919
+4036928920
+4036928921
+4036997040
+4036997041
+4036997042
+4036997043
+4036997044
+4036997045
+4036997046
+4036997047
+4036997048
+4036997049
diff --git a/level_0/f_utf/data/tests/bytesequences/digit-all.txt b/level_0/f_utf/data/tests/bytesequences/digit-all.txt

index 9bcbc2fb086c0718eef4b8b94e0cec01bbe70bbb..5c312bf92442f19a01218e752bd490ce9ef80ced 100644 (file)
--- a/level_0/f_utf/data/tests/bytesequences/digit-all.txt
+++ b/level_0/f_utf/data/tests/bytesequences/digit-all.txt
@@ -178,16 +178,6 @@
  14778759
  14778760
  14778761
-4036006560
-4036006561
-4036006562
-4036006563
-4036006564
-4036006565
-4036006566
-4036006567
-4036006568
-4036006569
  14779024
  14779025
  14779026
@@ -198,6 +188,237 @@
  14779031
  14779032
  14779033
+14786464
+14786465
+14786466
+14786467
+14786468
+14786469
+14786470
+14786471
+14786472
+14786473
+14786704
+14786705
+14786706
+14786707
+14786708
+14786709
+14786710
+14786711
+14786712
+14786713
+14787974
+14787975
+14787976
+14787977
+14787978
+14787979
+14787980
+14787981
+14787982
+14787983
+14788496
+14788497
+14788498
+14788499
+14788500
+14788501
+14788502
+14788503
+14788504
+14788505
+14789248
+14789249
+14789250
+14789251
+14789252
+14789253
+14789254
+14789255
+14789256
+14789257
+14789264
+14789265
+14789266
+14789267
+14789268
+14789269
+14789270
+14789271
+14789272
+14789273
+14790032
+14790033
+14790034
+14790035
+14790036
+14790037
+14790038
+14790039
+14790040
+14790041
+14790320
+14790321
+14790322
+14790323
+14790324
+14790325
+14790326
+14790327
+14790328
+14790329
+14791040
+14791041
+14791042
+14791043
+14791044
+14791045
+14791046
+14791047
+14791048
+14791049
+14791056
+14791057
+14791058
+14791059
+14791060
+14791061
+14791062
+14791063
+14791064
+14791065
+14845344
+14845345
+14845346
+14845347
+14845348
+14845349
+14845350
+14845351
+14845352
+14845353
+14845354
+14845355
+14845356
+14845357
+14845358
+14845359
+14845360
+14845361
+14845362
+14845363
+14845364
+14845365
+14845366
+14845367
+14845368
+14845369
+14845370
+14845371
+14845372
+14845373
+14845374
+14845375
+14845568
+14845569
+14845570
+14845571
+14845572
+14845573
+14845574
+14845575
+14845576
+15374496
+15374497
+15374498
+15374499
+15374500
+15374501
+15374502
+15374503
+15374504
+15374505
+15377296
+15377297
+15377298
+15377299
+15377300
+15377301
+15377302
+15377303
+15377304
+15377305
+15377536
+15377537
+15377538
+15377539
+15377540
+15377541
+15377542
+15377543
+15377544
+15377545
+15378320
+15378321
+15378322
+15378323
+15378324
+15378325
+15378326
+15378327
+15378328
+15378329
+15378352
+15378353
+15378354
+15378355
+15378356
+15378357
+15378358
+15378359
+15378360
+15378361
+15378832
+15378833
+15378834
+15378835
+15378836
+15378837
+15378838
+15378839
+15378840
+15378841
+15380400
+15380401
+15380402
+15380403
+15380404
+15380405
+15380406
+15380407
+15380408
+15380409
+15711376
+15711377
+15711378
+15711379
+15711380
+15711381
+15711382
+15711383
+15711384
+15711385
+4036006560
+4036006561
+4036006562
+4036006563
+4036006564
+4036006565
+4036006566
+4036006567
+4036006568
+4036006569
  4036015280
  4036015281
  4036015282
@@ -368,6 +589,16 @@
  4036405671
  4036405672
  4036405673
+4036406144
+4036406145
+4036406146
+4036406147
+4036406148
+4036406149
+4036406150
+4036406151
+4036406152
+4036406153
  4036406672
  4036406673
  4036406674
@@ -378,106 +609,6 @@
  4036406679
  4036406680
  4036406681
-14786464
-14786465
-14786466
-14786467
-14786468
-14786469
-14786470
-14786471
-14786472
-14786473
-14786704
-14786705
-14786706
-14786707
-14786708
-14786709
-14786710
-14786711
-14786712
-14786713
-14787974
-14787975
-14787976
-14787977
-14787978
-14787979
-14787980
-14787981
-14787982
-14787983
-14788496
-14788497
-14788498
-14788499
-14788500
-14788501
-14788502
-14788503
-14788504
-14788505
-14789248
-14789249
-14789250
-14789251
-14789252
-14789253
-14789254
-14789255
-14789256
-14789257
-14789264
-14789265
-14789266
-14789267
-14789268
-14789269
-14789270
-14789271
-14789272
-14789273
-14790032
-14790033
-14790034
-14790035
-14790036
-14790037
-14790038
-14790039
-14790040
-14790041
-14790320
-14790321
-14790322
-14790323
-14790324
-14790325
-14790326
-14790327
-14790328
-14790329
-14791040
-14791041
-14791042
-14791043
-14791044
-14791045
-14791046
-14791047
-14791048
-14791049
-14791056
-14791057
-14791058
-14791059
-14791060
-14791061
-14791062
-14791063
-14791064
-14791065
  4036861838
  4036861839
  4036861840
@@ -568,83 +699,3 @@
  4036997047
  4036997048
  4036997049
-15374496
-15374497
-15374498
-15374499
-15374500
-15374501
-15374502
-15374503
-15374504
-15374505
-15377296
-15377297
-15377298
-15377299
-15377300
-15377301
-15377302
-15377303
-15377304
-15377305
-15377536
-15377537
-15377538
-15377539
-15377540
-15377541
-15377542
-15377543
-15377544
-15377545
-15378320
-15378321
-15378322
-15378323
-15378324
-15378325
-15378326
-15378327
-15378328
-15378329
-15378352
-15378353
-15378354
-15378355
-15378356
-15378357
-15378358
-15378359
-15378360
-15378361
-15378832
-15378833
-15378834
-15378835
-15378836
-15378837
-15378838
-15378839
-15378840
-15378841
-15380400
-15380401
-15380402
-15380403
-15380404
-15380405
-15380406
-15380407
-15380408
-15380409
-15711376
-15711377
-15711378
-15711379
-15711380
-15711381
-15711382
-15711383
-15711384
-15711385
diff --git a/level_0/f_utf/data/tests/codepoints/decimal-all.txt b/level_0/f_utf/data/tests/codepoints/decimal-all.txt

new file mode 100644 (file)

index 0000000..5923866
--- /dev/null
+++ b/level_0/f_utf/data/tests/codepoints/decimal-all.txt
@@ -0,0 +1,701 @@
+U+0030 
+U+0031 
+U+0032 
+U+0033 
+U+0034 
+U+0035 
+U+0036 
+U+0037 
+U+0038 
+U+0039 
+U+0660 
+U+0661 
+U+0662 
+U+0663 
+U+0664 
+U+0665 
+U+0666 
+U+0667 
+U+0668 
+U+0669 
+U+06F0 
+U+06F1 
+U+06F2 
+U+06F3 
+U+06F4 
+U+06F5 
+U+06F6 
+U+06F7 
+U+06F8 
+U+06F9 
+U+07C0 
+U+07C1 
+U+07C2 
+U+07C3 
+U+07C4 
+U+07C5 
+U+07C6 
+U+07C7 
+U+07C8 
+U+07C9 
+U+0966 
+U+0967 
+U+0968 
+U+0969 
+U+096A 
+U+096B 
+U+096C 
+U+096D 
+U+096E 
+U+096F 
+U+09E6 
+U+09E7 
+U+09E8 
+U+09E9 
+U+09EA 
+U+09EB 
+U+09EC 
+U+09ED 
+U+09EE 
+U+09EF 
+U+0A66 
+U+0A67 
+U+0A68 
+U+0A69 
+U+0A6A 
+U+0A6B 
+U+0A6C 
+U+0A6D 
+U+0A6E 
+U+0A6F 
+U+0AE6 
+U+0AE7 
+U+0AE8 
+U+0AE9 
+U+0AEA 
+U+0AEB 
+U+0AEC 
+U+0AED 
+U+0AEE 
+U+0AEF 
+U+0B66 
+U+0B67 
+U+0B68 
+U+0B69 
+U+0B6A 
+U+0B6B 
+U+0B6C 
+U+0B6D 
+U+0B6E 
+U+0B6F 
+U+0BE6 
+U+0BE7 
+U+0BE8 
+U+0BE9 
+U+0BEA 
+U+0BEB 
+U+0BEC 
+U+0BED 
+U+0BEE 
+U+0BEF 
+U+0C66 
+U+0C67 
+U+0C68 
+U+0C69 
+U+0C6A 
+U+0C6B 
+U+0C6C 
+U+0C6D 
+U+0C6E 
+U+0C6F 
+U+0CE6 
+U+0CE7 
+U+0CE8 
+U+0CE9 
+U+0CEA 
+U+0CEB 
+U+0CEC 
+U+0CED 
+U+0CEE 
+U+0CEF 
+U+0D66 
+U+0D67 
+U+0D68 
+U+0D69 
+U+0D6A 
+U+0D6B 
+U+0D6C 
+U+0D6D 
+U+0D6E 
+U+0D6F 
+U+0DE6 
+U+0DE7 
+U+0DE8 
+U+0DE9 
+U+0DEA 
+U+0DEB 
+U+0DEC 
+U+0DED 
+U+0DEE 
+U+0DEF 
+U+0E50 
+U+0E51 
+U+0E52 
+U+0E53 
+U+0E54 
+U+0E55 
+U+0E56 
+U+0E57 
+U+0E58 
+U+0E59 
+U+0ED0 
+U+0ED1 
+U+0ED2 
+U+0ED3 
+U+0ED4 
+U+0ED5 
+U+0ED6 
+U+0ED7 
+U+0ED8 
+U+0ED9 
+U+0F20 
+U+0F21 
+U+0F22 
+U+0F23 
+U+0F24 
+U+0F25 
+U+0F26 
+U+0F27 
+U+0F28 
+U+0F29 
+U+1040 
+U+1041 
+U+1042 
+U+1043 
+U+1044 
+U+1045 
+U+1046 
+U+1047 
+U+1048 
+U+1049 
+U+1090 
+U+1091 
+U+1092 
+U+1093 
+U+1094 
+U+1095 
+U+1096 
+U+1097 
+U+1098 
+U+1099 
+U+17E0 
+U+17E1 
+U+17E2 
+U+17E3 
+U+17E4 
+U+17E5 
+U+17E6 
+U+17E7 
+U+17E8 
+U+17E9 
+U+1810 
+U+1811 
+U+1812 
+U+1813 
+U+1814 
+U+1815 
+U+1816 
+U+1817 
+U+1818 
+U+1819 
+U+1946 
+U+1947 
+U+1948 
+U+1949 
+U+194A 
+U+194B 
+U+194C 
+U+194D 
+U+194E 
+U+194F 
+U+19D0 
+U+19D1 
+U+19D2 
+U+19D3 
+U+19D4 
+U+19D5 
+U+19D6 
+U+19D7 
+U+19D8 
+U+19D9 
+U+1A80 
+U+1A81 
+U+1A82 
+U+1A83 
+U+1A84 
+U+1A85 
+U+1A86 
+U+1A87 
+U+1A88 
+U+1A89 
+U+1A90 
+U+1A91 
+U+1A92 
+U+1A93 
+U+1A94 
+U+1A95 
+U+1A96 
+U+1A97 
+U+1A98 
+U+1A99 
+U+1B50 
+U+1B51 
+U+1B52 
+U+1B53 
+U+1B54 
+U+1B55 
+U+1B56 
+U+1B57 
+U+1B58 
+U+1B59 
+U+1BB0 
+U+1BB1 
+U+1BB2 
+U+1BB3 
+U+1BB4 
+U+1BB5 
+U+1BB6 
+U+1BB7 
+U+1BB8 
+U+1BB9 
+U+1C40 
+U+1C41 
+U+1C42 
+U+1C43 
+U+1C44 
+U+1C45 
+U+1C46 
+U+1C47 
+U+1C48 
+U+1C49 
+U+1C50 
+U+1C51 
+U+1C52 
+U+1C53 
+U+1C54 
+U+1C55 
+U+1C56 
+U+1C57 
+U+1C58 
+U+1C59 
+U+2160
+U+2161
+U+2162
+U+2163
+U+2164
+U+2165
+U+2166
+U+2167
+U+2168
+U+2169
+U+216A
+U+216B
+U+216C
+U+216D
+U+216E
+U+216F
+U+2170
+U+2171
+U+2172
+U+2173
+U+2174
+U+2175
+U+2176
+U+2177
+U+2178
+U+2179
+U+217A
+U+217B
+U+217C
+U+217D
+U+217E
+U+217F
+U+2180
+U+2181
+U+2182
+U+2183
+U+2184
+U+2185
+U+2186
+U+2187
+U+2188
+U+A620 
+U+A621 
+U+A622 
+U+A623 
+U+A624 
+U+A625 
+U+A626 
+U+A627 
+U+A628 
+U+A629 
+U+A8D0 
+U+A8D1 
+U+A8D2 
+U+A8D3 
+U+A8D4 
+U+A8D5 
+U+A8D6 
+U+A8D7 
+U+A8D8 
+U+A8D9 
+U+A900 
+U+A901 
+U+A902 
+U+A903 
+U+A904 
+U+A905 
+U+A906 
+U+A907 
+U+A908 
+U+A909 
+U+A9D0 
+U+A9D1 
+U+A9D2 
+U+A9D3 
+U+A9D4 
+U+A9D5 
+U+A9D6 
+U+A9D7 
+U+A9D8 
+U+A9D9 
+U+A9F0 
+U+A9F1 
+U+A9F2 
+U+A9F3 
+U+A9F4 
+U+A9F5 
+U+A9F6 
+U+A9F7 
+U+A9F8 
+U+A9F9 
+U+AA50 
+U+AA51 
+U+AA52 
+U+AA53 
+U+AA54 
+U+AA55 
+U+AA56 
+U+AA57 
+U+AA58 
+U+AA59 
+U+ABF0 
+U+ABF1 
+U+ABF2 
+U+ABF3 
+U+ABF4 
+U+ABF5 
+U+ABF6 
+U+ABF7 
+U+ABF8 
+U+ABF9 
+U+FF10 
+U+FF11 
+U+FF12 
+U+FF13 
+U+FF14 
+U+FF15 
+U+FF16 
+U+FF17 
+U+FF18 
+U+FF19 
+U+104A0
+U+104A1
+U+104A2
+U+104A3
+U+104A4
+U+104A5
+U+104A6
+U+104A7
+U+104A8
+U+104A9
+U+10D30
+U+10D31
+U+10D32
+U+10D33
+U+10D34
+U+10D35
+U+10D36
+U+10D37
+U+10D38
+U+10D39
+U+11066
+U+11067
+U+11068
+U+11069
+U+1106A
+U+1106B
+U+1106C
+U+1106D
+U+1106E
+U+1106F
+U+110F0
+U+110F1
+U+110F2
+U+110F3
+U+110F4
+U+110F5
+U+110F6
+U+110F7
+U+110F8
+U+110F9
+U+11136
+U+11137
+U+11138
+U+11139
+U+1113A
+U+1113B
+U+1113C
+U+1113D
+U+1113E
+U+1113F
+U+111D0
+U+111D1
+U+111D2
+U+111D3
+U+111D4
+U+111D5
+U+111D6
+U+111D7
+U+111D8
+U+111D9
+U+112F0
+U+112F1
+U+112F2
+U+112F3
+U+112F4
+U+112F5
+U+112F6
+U+112F7
+U+112F8
+U+112F9
+U+11450
+U+11451
+U+11452
+U+11453
+U+11454
+U+11455
+U+11456
+U+11457
+U+11458
+U+11459
+U+114D0
+U+114D1
+U+114D2
+U+114D3
+U+114D4
+U+114D5
+U+114D6
+U+114D7
+U+114D8
+U+114D9
+U+11650
+U+11651
+U+11652
+U+11653
+U+11654
+U+11655
+U+11656
+U+11657
+U+11658
+U+11659
+U+116C0
+U+116C1
+U+116C2
+U+116C3
+U+116C4
+U+116C5
+U+116C6
+U+116C7
+U+116C8
+U+116C9
+U+11730
+U+11731
+U+11732
+U+11733
+U+11734
+U+11735
+U+11736
+U+11737
+U+11738
+U+11739
+U+118E0
+U+118E1
+U+118E2
+U+118E3
+U+118E4
+U+118E5
+U+118E6
+U+118E7
+U+118E8
+U+118E9
+U+11950
+U+11951
+U+11952
+U+11953
+U+11954
+U+11955
+U+11956
+U+11957
+U+11958
+U+11959
+U+11C50
+U+11C51
+U+11C52
+U+11C53
+U+11C54
+U+11C55
+U+11C56
+U+11C57
+U+11C58
+U+11C59
+U+11D50
+U+11D51
+U+11D52
+U+11D53
+U+11D54
+U+11D55
+U+11D56
+U+11D57
+U+11D58
+U+11D59
+U+11DA0
+U+11DA1
+U+11DA2
+U+11DA3
+U+11DA4
+U+11DA5
+U+11DA6
+U+11DA7
+U+11DA8
+U+11DA9
+U+16A60
+U+16A61
+U+16A62
+U+16A63
+U+16A64
+U+16A65
+U+16A66
+U+16A67
+U+16A68
+U+16A69
+U+16AC0
+U+16AC1
+U+16AC2
+U+16AC3
+U+16AC4
+U+16AC5
+U+16AC6
+U+16AC7
+U+16AC8
+U+16AC9
+U+16B50
+U+16B51
+U+16B52
+U+16B53
+U+16B54
+U+16B55
+U+16B56
+U+16B57
+U+16B58
+U+16B59
+U+1D7CE
+U+1D7CF
+U+1D7D0
+U+1D7D1
+U+1D7D2
+U+1D7D3
+U+1D7D4
+U+1D7D5
+U+1D7D6
+U+1D7D7
+U+1D7D8
+U+1D7D9
+U+1D7DA
+U+1D7DB
+U+1D7DC
+U+1D7DD
+U+1D7DE
+U+1D7DF
+U+1D7E0
+U+1D7E1
+U+1D7E2
+U+1D7E3
+U+1D7E4
+U+1D7E5
+U+1D7E6
+U+1D7E7
+U+1D7E8
+U+1D7E9
+U+1D7EA
+U+1D7EB
+U+1D7EC
+U+1D7ED
+U+1D7EE
+U+1D7EF
+U+1D7F0
+U+1D7F1
+U+1D7F2
+U+1D7F3
+U+1D7F4
+U+1D7F5
+U+1D7F6
+U+1D7F7
+U+1D7F8
+U+1D7F9
+U+1D7FA
+U+1D7FB
+U+1D7FC
+U+1D7FD
+U+1D7FE
+U+1D7FF
+U+1E140
+U+1E141
+U+1E142
+U+1E143
+U+1E144
+U+1E145
+U+1E146
+U+1E147
+U+1E148
+U+1E149
+U+1E2F0
+U+1E2F1
+U+1E2F2
+U+1E2F3
+U+1E2F4
+U+1E2F5
+U+1E2F6
+U+1E2F7
+U+1E2F8
+U+1E2F9
+U+1E950
+U+1E951
+U+1E952
+U+1E953
+U+1E954
+U+1E955
+U+1E956
+U+1E957
+U+1E958
+U+1E959
+U+1FBF0
+U+1FBF1
+U+1FBF2
+U+1FBF3
+U+1FBF4
+U+1FBF5
+U+1FBF6
+U+1FBF7
+U+1FBF8
+U+1FBF9
diff --git a/level_0/f_utf/data/tests/codepoints/digit-all.txt b/level_0/f_utf/data/tests/codepoints/digit-all.txt

index 4f954bdaf4733d592b7342d02b254a487e973290..592386607492df8a6014ea30a35b303f4407bac6 100644 (file)
--- a/level_0/f_utf/data/tests/codepoints/digit-all.txt
+++ b/level_0/f_utf/data/tests/codepoints/digit-all.txt
@@ -1,183 +1,414 @@
-U+0030
-U+0031
-U+0032
-U+0033
-U+0034
-U+0035
-U+0036
-U+0037
-U+0038
-U+0039
-U+0660
-U+0661
-U+0662
-U+0663
-U+0664
-U+0665
-U+0666
-U+0667
-U+0668
-U+0669
-U+06F0
-U+06F1
-U+06F2
-U+06F3
-U+06F4
-U+06F5
-U+06F6
-U+06F7
-U+06F8
-U+06F9
-U+07C0
-U+07C1
-U+07C2
-U+07C3
-U+07C4
-U+07C5
-U+07C6
-U+07C7
-U+07C8
-U+07C9
-U+0966
-U+0967
-U+0968
-U+0969
-U+096A
-U+096B
-U+096C
-U+096D
-U+096E
-U+096F
-U+09E6
-U+09E7
-U+09E8
-U+09E9
-U+09EA
-U+09EB
-U+09EC
-U+09ED
-U+09EE
-U+09EF
-U+0A66
-U+0A67
-U+0A68
-U+0A69
-U+0A6A
-U+0A6B
-U+0A6C
-U+0A6D
-U+0A6E
-U+0A6F
-U+0AE6
-U+0AE7
-U+0AE8
-U+0AE9
-U+0AEA
-U+0AEB
-U+0AEC
-U+0AED
-U+0AEE
-U+0AEF
-U+0B66
-U+0B67
-U+0B68
-U+0B69
-U+0B6A
-U+0B6B
-U+0B6C
-U+0B6D
-U+0B6E
-U+0B6F
-U+0BE6
-U+0BE7
-U+0BE8
-U+0BE9
-U+0BEA
-U+0BEB
-U+0BEC
-U+0BED
-U+0BEE
-U+0BEF
-U+0C66
-U+0C67
-U+0C68
-U+0C69
-U+0C6A
-U+0C6B
-U+0C6C
-U+0C6D
-U+0C6E
-U+0C6F
-U+0CE6
-U+0CE7
-U+0CE8
-U+0CE9
-U+0CEA
-U+0CEB
-U+0CEC
-U+0CED
-U+0CEE
-U+0CEF
-U+0D66
-U+0D67
-U+0D68
-U+0D69
-U+0D6A
-U+0D6B
-U+0D6C
-U+0D6D
-U+0D6E
-U+0D6F
-U+0DE6
-U+0DE7
-U+0DE8
-U+0DE9
-U+0DEA
-U+0DEB
-U+0DEC
-U+0DED
-U+0DEE
-U+0DEF
-U+0E50
-U+0E51
-U+0E52
-U+0E53
-U+0E54
-U+0E55
-U+0E56
-U+0E57
-U+0E58
-U+0E59
-U+0ED0
-U+0ED1
-U+0ED2
-U+0ED3
-U+0ED4
-U+0ED5
-U+0ED6
-U+0ED7
-U+0ED8
-U+0ED9
-U+0F20
-U+0F21
-U+0F22
-U+0F23
-U+0F24
-U+0F25
-U+0F26
-U+0F27
-U+0F28
-U+0F29
-U+1040
-U+1041
-U+1042
-U+1043
-U+1044
-U+1045
-U+1046
-U+1047
-U+1048
-U+1049
+U+0030 
+U+0031 
+U+0032 
+U+0033 
+U+0034 
+U+0035 
+U+0036 
+U+0037 
+U+0038 
+U+0039 
+U+0660 
+U+0661 
+U+0662 
+U+0663 
+U+0664 
+U+0665 
+U+0666 
+U+0667 
+U+0668 
+U+0669 
+U+06F0 
+U+06F1 
+U+06F2 
+U+06F3 
+U+06F4 
+U+06F5 
+U+06F6 
+U+06F7 
+U+06F8 
+U+06F9 
+U+07C0 
+U+07C1 
+U+07C2 
+U+07C3 
+U+07C4 
+U+07C5 
+U+07C6 
+U+07C7 
+U+07C8 
+U+07C9 
+U+0966 
+U+0967 
+U+0968 
+U+0969 
+U+096A 
+U+096B 
+U+096C 
+U+096D 
+U+096E 
+U+096F 
+U+09E6 
+U+09E7 
+U+09E8 
+U+09E9 
+U+09EA 
+U+09EB 
+U+09EC 
+U+09ED 
+U+09EE 
+U+09EF 
+U+0A66 
+U+0A67 
+U+0A68 
+U+0A69 
+U+0A6A 
+U+0A6B 
+U+0A6C 
+U+0A6D 
+U+0A6E 
+U+0A6F 
+U+0AE6 
+U+0AE7 
+U+0AE8 
+U+0AE9 
+U+0AEA 
+U+0AEB 
+U+0AEC 
+U+0AED 
+U+0AEE 
+U+0AEF 
+U+0B66 
+U+0B67 
+U+0B68 
+U+0B69 
+U+0B6A 
+U+0B6B 
+U+0B6C 
+U+0B6D 
+U+0B6E 
+U+0B6F 
+U+0BE6 
+U+0BE7 
+U+0BE8 
+U+0BE9 
+U+0BEA 
+U+0BEB 
+U+0BEC 
+U+0BED 
+U+0BEE 
+U+0BEF 
+U+0C66 
+U+0C67 
+U+0C68 
+U+0C69 
+U+0C6A 
+U+0C6B 
+U+0C6C 
+U+0C6D 
+U+0C6E 
+U+0C6F 
+U+0CE6 
+U+0CE7 
+U+0CE8 
+U+0CE9 
+U+0CEA 
+U+0CEB 
+U+0CEC 
+U+0CED 
+U+0CEE 
+U+0CEF 
+U+0D66 
+U+0D67 
+U+0D68 
+U+0D69 
+U+0D6A 
+U+0D6B 
+U+0D6C 
+U+0D6D 
+U+0D6E 
+U+0D6F 
+U+0DE6 
+U+0DE7 
+U+0DE8 
+U+0DE9 
+U+0DEA 
+U+0DEB 
+U+0DEC 
+U+0DED 
+U+0DEE 
+U+0DEF 
+U+0E50 
+U+0E51 
+U+0E52 
+U+0E53 
+U+0E54 
+U+0E55 
+U+0E56 
+U+0E57 
+U+0E58 
+U+0E59 
+U+0ED0 
+U+0ED1 
+U+0ED2 
+U+0ED3 
+U+0ED4 
+U+0ED5 
+U+0ED6 
+U+0ED7 
+U+0ED8 
+U+0ED9 
+U+0F20 
+U+0F21 
+U+0F22 
+U+0F23 
+U+0F24 
+U+0F25 
+U+0F26 
+U+0F27 
+U+0F28 
+U+0F29 
+U+1040 
+U+1041 
+U+1042 
+U+1043 
+U+1044 
+U+1045 
+U+1046 
+U+1047 
+U+1048 
+U+1049 
+U+1090 
+U+1091 
+U+1092 
+U+1093 
+U+1094 
+U+1095 
+U+1096 
+U+1097 
+U+1098 
+U+1099 
+U+17E0 
+U+17E1 
+U+17E2 
+U+17E3 
+U+17E4 
+U+17E5 
+U+17E6 
+U+17E7 
+U+17E8 
+U+17E9 
+U+1810 
+U+1811 
+U+1812 
+U+1813 
+U+1814 
+U+1815 
+U+1816 
+U+1817 
+U+1818 
+U+1819 
+U+1946 
+U+1947 
+U+1948 
+U+1949 
+U+194A 
+U+194B 
+U+194C 
+U+194D 
+U+194E 
+U+194F 
+U+19D0 
+U+19D1 
+U+19D2 
+U+19D3 
+U+19D4 
+U+19D5 
+U+19D6 
+U+19D7 
+U+19D8 
+U+19D9 
+U+1A80 
+U+1A81 
+U+1A82 
+U+1A83 
+U+1A84 
+U+1A85 
+U+1A86 
+U+1A87 
+U+1A88 
+U+1A89 
+U+1A90 
+U+1A91 
+U+1A92 
+U+1A93 
+U+1A94 
+U+1A95 
+U+1A96 
+U+1A97 
+U+1A98 
+U+1A99 
+U+1B50 
+U+1B51 
+U+1B52 
+U+1B53 
+U+1B54 
+U+1B55 
+U+1B56 
+U+1B57 
+U+1B58 
+U+1B59 
+U+1BB0 
+U+1BB1 
+U+1BB2 
+U+1BB3 
+U+1BB4 
+U+1BB5 
+U+1BB6 
+U+1BB7 
+U+1BB8 
+U+1BB9 
+U+1C40 
+U+1C41 
+U+1C42 
+U+1C43 
+U+1C44 
+U+1C45 
+U+1C46 
+U+1C47 
+U+1C48 
+U+1C49 
+U+1C50 
+U+1C51 
+U+1C52 
+U+1C53 
+U+1C54 
+U+1C55 
+U+1C56 
+U+1C57 
+U+1C58 
+U+1C59 
+U+2160
+U+2161
+U+2162
+U+2163
+U+2164
+U+2165
+U+2166
+U+2167
+U+2168
+U+2169
+U+216A
+U+216B
+U+216C
+U+216D
+U+216E
+U+216F
+U+2170
+U+2171
+U+2172
+U+2173
+U+2174
+U+2175
+U+2176
+U+2177
+U+2178
+U+2179
+U+217A
+U+217B
+U+217C
+U+217D
+U+217E
+U+217F
+U+2180
+U+2181
+U+2182
+U+2183
+U+2184
+U+2185
+U+2186
+U+2187
+U+2188
+U+A620 
+U+A621 
+U+A622 
+U+A623 
+U+A624 
+U+A625 
+U+A626 
+U+A627 
+U+A628 
+U+A629 
+U+A8D0 
+U+A8D1 
+U+A8D2 
+U+A8D3 
+U+A8D4 
+U+A8D5 
+U+A8D6 
+U+A8D7 
+U+A8D8 
+U+A8D9 
+U+A900 
+U+A901 
+U+A902 
+U+A903 
+U+A904 
+U+A905 
+U+A906 
+U+A907 
+U+A908 
+U+A909 
+U+A9D0 
+U+A9D1 
+U+A9D2 
+U+A9D3 
+U+A9D4 
+U+A9D5 
+U+A9D6 
+U+A9D7 
+U+A9D8 
+U+A9D9 
+U+A9F0 
+U+A9F1 
+U+A9F2 
+U+A9F3 
+U+A9F4 
+U+A9F5 
+U+A9F6 
+U+A9F7 
+U+A9F8 
+U+A9F9 
+U+AA50 
+U+AA51 
+U+AA52 
+U+AA53 
+U+AA54 
+U+AA55 
+U+AA56 
+U+AA57 
+U+AA58 
+U+AA59 
+U+ABF0 
+U+ABF1 
+U+ABF2 
+U+ABF3 
+U+ABF4 
+U+ABF5 
+U+ABF6 
+U+ABF7 
+U+ABF8 
+U+ABF9 
+U+FF10 
+U+FF11 
+U+FF12 
+U+FF13 
+U+FF14 
+U+FF15 
+U+FF16 
+U+FF17 
+U+FF18 
+U+FF19 
  U+104A0
  U+104A1
  U+104A2
@@ -188,16 +419,6 @@ U+104A6
  U+104A7
  U+104A8
  U+104A9
-U+1090
-U+1091
-U+1092
-U+1093
-U+1094
-U+1095
-U+1096
-U+1097
-U+1098
-U+1099
  U+10D30
  U+10D31
  U+10D32
@@ -368,6 +589,16 @@ U+16A66
  U+16A67
  U+16A68
  U+16A69
+U+16AC0
+U+16AC1
+U+16AC2
+U+16AC3
+U+16AC4
+U+16AC5
+U+16AC6
+U+16AC7
+U+16AC8
+U+16AC9
  U+16B50
  U+16B51
  U+16B52
@@ -378,106 +609,6 @@ U+16B56
  U+16B57
  U+16B58
  U+16B59
-U+17E0
-U+17E1
-U+17E2
-U+17E3
-U+17E4
-U+17E5
-U+17E6
-U+17E7
-U+17E8
-U+17E9
-U+1810
-U+1811
-U+1812
-U+1813
-U+1814
-U+1815
-U+1816
-U+1817
-U+1818
-U+1819
-U+1946
-U+1947
-U+1948
-U+1949
-U+194A
-U+194B
-U+194C
-U+194D
-U+194E
-U+194F
-U+19D0
-U+19D1
-U+19D2
-U+19D3
-U+19D4
-U+19D5
-U+19D6
-U+19D7
-U+19D8
-U+19D9
-U+1A80
-U+1A81
-U+1A82
-U+1A83
-U+1A84
-U+1A85
-U+1A86
-U+1A87
-U+1A88
-U+1A89
-U+1A90
-U+1A91
-U+1A92
-U+1A93
-U+1A94
-U+1A95
-U+1A96
-U+1A97
-U+1A98
-U+1A99
-U+1B50
-U+1B51
-U+1B52
-U+1B53
-U+1B54
-U+1B55
-U+1B56
-U+1B57
-U+1B58
-U+1B59
-U+1BB0
-U+1BB1
-U+1BB2
-U+1BB3
-U+1BB4
-U+1BB5
-U+1BB6
-U+1BB7
-U+1BB8
-U+1BB9
-U+1C40
-U+1C41
-U+1C42
-U+1C43
-U+1C44
-U+1C45
-U+1C46
-U+1C47
-U+1C48
-U+1C49
-U+1C50
-U+1C51
-U+1C52
-U+1C53
-U+1C54
-U+1C55
-U+1C56
-U+1C57
-U+1C58
-U+1C59
  U+1D7CE
  U+1D7CF
  U+1D7D0
@@ -568,83 +699,3 @@ U+1FBF6
  U+1FBF7
  U+1FBF8
  U+1FBF9
-U+A620
-U+A621
-U+A622
-U+A623
-U+A624
-U+A625
-U+A626
-U+A627
-U+A628
-U+A629
-U+A8D0
-U+A8D1
-U+A8D2
-U+A8D3
-U+A8D4
-U+A8D5
-U+A8D6
-U+A8D7
-U+A8D8
-U+A8D9
-U+A900
-U+A901
-U+A902
-U+A903
-U+A904
-U+A905
-U+A906
-U+A907
-U+A908
-U+A909
-U+A9D0
-U+A9D1
-U+A9D2
-U+A9D3
-U+A9D4
-U+A9D5
-U+A9D6
-U+A9D7
-U+A9D8
-U+A9D9
-U+A9F0
-U+A9F1
-U+A9F2
-U+A9F3
-U+A9F4
-U+A9F5
-U+A9F6
-U+A9F7
-U+A9F8
-U+A9F9
-U+AA50
-U+AA51
-U+AA52
-U+AA53
-U+AA54
-U+AA55
-U+AA56
-U+AA57
-U+AA58
-U+AA59
-U+ABF0
-U+ABF1
-U+ABF2
-U+ABF3
-U+ABF4
-U+ABF5
-U+ABF6
-U+ABF7
-U+ABF8
-U+ABF9
-U+FF10
-U+FF11
-U+FF12
-U+FF13
-U+FF14
-U+FF15
-U+FF16
-U+FF17
-U+FF18
-U+FF19
diff --git a/level_0/f_utf/data/tests/values/decimal-all.txt b/level_0/f_utf/data/tests/values/decimal-all.txt

new file mode 100644 (file)

index 0000000..c647e79
--- /dev/null
+++ b/level_0/f_utf/data/tests/values/decimal-all.txt
@@ -0,0 +1,701 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+50
+100
+500
+1000
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+50
+100
+500
+1000
+1000
+5000
+10000
+100
+100
+6
+50
+50000
+100000
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/level_0/f_utf/tests/unit/c/data-utf.c b/level_0/f_utf/tests/unit/c/data-utf.c

index 0f837825259124291a5cb7d90061f0f3233b2f03..88755bd31866ff8d7f3a7f294ee19079b80a61c2 100644 (file)
--- a/level_0/f_utf/tests/unit/c/data-utf.c
+++ b/level_0/f_utf/tests/unit/c/data-utf.c
@@ -19,6 +19,11 @@ FILE *data__bytesequence_file_open__control(void) {
    return fopen("./data/tests/bytesequences/control-all.txt", "r");
  }
  
+FILE *data__bytesequence_file_open__decimal(void) {
+
+  return fopen("./data/tests/bytesequences/decimal-all.txt", "r");
+}
+
  FILE *data__bytesequence_file_open__digit(void) {
  
    return fopen("./data/tests/bytesequences/digit-all.txt", "r");
@@ -132,6 +137,29 @@ ssize_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const char
    return bytes;
  }
  
+FILE *data__value_file_open__decimal(void) {
+
+  return fopen("./data/tests/values/decimal-all.txt", "r");
+}
+
+ssize_t data__value_get_line_long_long(FILE * const file, uint32_t * const value) {
+
+  size_t length = 0;
+  char *line = 0;
+
+  const ssize_t bytes = getline(&line, &length, file);
+
+  if (bytes > 0) {
+    *value = (uint32_t) atol(line);
+  }
+
+  if (line) {
+    free(line);
+  }
+
+  return bytes;
+}
+
  #ifdef __cplusplus
  } // extern "C"
  #endif
diff --git a/level_0/f_utf/tests/unit/c/data-utf.h b/level_0/f_utf/tests/unit/c/data-utf.h

index 31b7c506e60168aafa5d7d0b4b8bae19e3d45357..b1ee5336b8a569a7e27e02bc72190bac866e7eae 100644 (file)
--- a/level_0/f_utf/tests/unit/c/data-utf.h
+++ b/level_0/f_utf/tests/unit/c/data-utf.h
@@ -68,6 +68,21 @@ extern FILE *data__bytesequence_file_open__combining(void);
  extern FILE *data__bytesequence_file_open__control(void);
  
  /**
+ * Open the "decimal" bytesequence file.
+ *
+ * This assumes the following:
+ * - The file path is relative to the current working directory (tests are run from project root).
+ * - The file path is "data/tests/bytesequences/decimal-all.txt".
+ *
+ * @return
+ *   Non-zero on success.
+ *   0 on failure.
+ *
+ * @see fopen()
+ */
+extern FILE *data__bytesequence_file_open__decimal(void);
+
+/**
   * Open the "digit" bytesequence file.
   *
   * This assumes the following:
@@ -286,7 +301,7 @@ extern FILE *data__bytesequence_file_open__zero_width(void);
   *
   * This should handle converting the number between big and little endian as needed.
   *
- * The input file is expected to be in base-10 so that existing standarrd functions like atoll() can be easily used.
+ * The input file is expected to be in base-10 so that existing standard functions like atoll() can be easily used.
   *
   * @param file
   *   The file stream.
@@ -300,10 +315,47 @@ extern FILE *data__bytesequence_file_open__zero_width(void);
   *
   * @see atoll()
   * @see getline()
- * @see htonl()
   */
  extern ssize_t data__bytesequence_get_line(FILE * const file, f_utf_char_t * const character);
  
+/**
+ * Open the "decimal_number" values file.
+ *
+ * This assumes the following:
+ * - The file path is relative to the current working directory (tests are run from project root).
+ * - The file path is "data/tests/values/decimal_number-all.txt".
+ *
+ * @return
+ *   Non-zero on success.
+ *   0 on failure.
+ *
+ * @see fopen()
+ */
+extern FILE *data__value_file_open__decimal(void);
+
+/**
+ * Simple line reader that converts the line into a long long.
+ *
+ * This assumes the following:
+ * - The line only contains base-10 digits as ASCII characters.
+ *
+ * The input file is expected to be in base-10 so that existing standard functions like atoll() can be easily used.
+ *
+ * @param file
+ *   The file stream.
+ * @param value
+ *   The number read from the file at the current line in the stream.
+ *
+ * @return
+ *   positive number on success where number represents bytes read.
+ *   0 on success and end of file is reached.
+ *   -1 on failure.
+ *
+ * @see atol()
+ * @see getline()
+ */
+extern ssize_t data__value_get_line_long_long(FILE * const file, uint32_t * const value);
+
  #ifdef __cplusplus
  } // extern "C"
  #endif
diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.c b/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.c

new file mode 100644 (file)

index 0000000..8c2efed
--- /dev/null
+++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.c
@@ -0,0 +1,47 @@
+#include "test-utf.h"
+#include "test-utf-character_is_decimal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void test__f_utf_character_is_decimal__works(void **state) {
+
+  {
+    FILE *file = data__bytesequence_file_open__decimal();
+    FILE *file_number = data__value_file_open__decimal();
+
+    assert_non_null(file);
+    assert_non_null(file_number);
+
+    f_utf_char_t sequence = 0;
+    ssize_t bytes = 0;
+    ssize_t bytes_number = 0;
+    uint32_t number = 0;
+    f_array_length_t line = 0;
+
+    do {
+      bytes = data__bytesequence_get_line(file, &sequence);
+      bytes_number = data__value_get_line_long_long(file_number, &number);
+
+      if (bytes > 0 && bytes_number > 0) {
+        uint32_t value = F_type_size_max_32_unsigned_d;
+
+        const f_status_t status = f_utf_character_is_decimal(sequence, &value);
+
+        assert_int_equal(status, F_true);
+        assert_int_equal(value, number);
+      }
+
+      ++line;
+
+    } while (bytes > 0 && bytes_number > 0);
+
+    fclose(file);
+    fclose(file_number);
+  }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.h b/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.h

new file mode 100644 (file)

index 0000000..326307e
--- /dev/null
+++ b/level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.h
@@ -0,0 +1,20 @@
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgpl-2.1-or-later
+ *
+ * Test the function in the utf project.
+ */
+#ifndef _TEST__F_utf_character_is_decimal_h
+#define _TEST__F_utf_character_is_decimal_h
+
+/**
+ * Test that the function works.
+ *
+ * @see f_utf_character_is_decimal()
+ */
+extern void test__f_utf_character_is_decimal__works(void **state);
+
+#endif // _TEST__F_utf_character_is_decimal_h
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c

index c96ec849040127bdb46db463df7b5b249958d19d..dd00bfb4a598ed2139b73cbdf8854d3aa4d60869 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c
@@ -20,7 +20,7 @@ void test__f_utf_is_alphabetic__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_combining.c b/level_0/f_utf/tests/unit/c/test-utf-is_combining.c

index 9173f1024d0552e2381e09549bc5e0a30208df01..0c0a63a8d1f26e036a0f8346d6b3133a0648ab79 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_combining.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_combining.c
@@ -20,7 +20,7 @@ void test__f_utf_is_combining__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_control.c b/level_0/f_utf/tests/unit/c/test-utf-is_control.c

index 5d8189ee8d1e679efd4ffdbb0940ab4afc1fc3a3..43087b877e88affd50f0d08a5f97d6ceee1d0076 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_control.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_control.c
@@ -20,7 +20,7 @@ void test__f_utf_is_control__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_decimal.c b/level_0/f_utf/tests/unit/c/test-utf-is_decimal.c

new file mode 100644 (file)

index 0000000..53e1ba0
--- /dev/null
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_decimal.c
@@ -0,0 +1,64 @@
+#include "test-utf.h"
+#include "test-utf-is_decimal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void test__f_utf_is_decimal__works(void **state) {
+
+  {
+    FILE *file = data__bytesequence_file_open__decimal();
+    FILE *file_number = data__value_file_open__decimal();
+
+    assert_non_null(file);
+    assert_non_null(file_number);
+
+    f_utf_char_t sequence = 0;
+    ssize_t bytes = 0;
+    ssize_t bytes_number = 0;
+    uint32_t number = 0;
+    f_array_length_t line = 0;
+
+    do {
+      bytes = data__bytesequence_get_line(file, &sequence);
+      bytes_number = data__value_get_line_long_long(file_number, &number);
+
+      if (bytes > 0 && bytes_number > 0) {
+        const uint8_t width = macro_f_utf_char_t_width(sequence);
+        char buffer[5] = { 0, 0, 0, 0, 0 };
+
+        buffer[0] = macro_f_utf_char_t_to_char_1(sequence);
+
+        if (width > 1) {
+          buffer[1] = macro_f_utf_char_t_to_char_2(sequence);
+
+          if (width > 2) {
+            buffer[2] = macro_f_utf_char_t_to_char_3(sequence);
+
+            if (width > 3) {
+              buffer[3] = macro_f_utf_char_t_to_char_4(sequence);
+            }
+          }
+        }
+
+        uint32_t value = F_type_size_max_32_unsigned_d;
+
+        const f_status_t status = f_utf_is_decimal(buffer, 5, &value);
+
+        assert_int_equal(status, F_true);
+        assert_int_equal(value, number);
+      }
+
+      ++line;
+
+    } while (bytes > 0 && bytes_number > 0);
+
+    fclose(file);
+    fclose(file_number);
+  }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_decimal.h b/level_0/f_utf/tests/unit/c/test-utf-is_decimal.h

new file mode 100644 (file)

index 0000000..3d795ee
--- /dev/null
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_decimal.h
@@ -0,0 +1,20 @@
+/**
+ * FLL - Level 0
+ *
+ * Project: UTF
+ * API Version: 0.5
+ * Licenses: lgpl-2.1-or-later
+ *
+ * Test the function in the utf project.
+ */
+#ifndef _TEST__F_utf_is_decimal_h
+#define _TEST__F_utf_is_decimal_h
+
+/**
+ * Test that the function works.
+ *
+ * @see f_utf_is_decimal()
+ */
+extern void test__f_utf_is_decimal__works(void **state);
+
+#endif // _TEST__F_utf_is_decimal_h
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_digit.c b/level_0/f_utf/tests/unit/c/test-utf-is_digit.c

index 5db8d58c70627296ebf8fe1e222a55ef73430140..37683e8636276acd50bc5eceb83ca27bab9596b7 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_digit.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_digit.c
@@ -20,7 +20,7 @@ void test__f_utf_is_digit__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c b/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c

index 054eaf407267a8b98c09c6e464aa200716bc0067..014d35064c7102f6d7be2f304af71cca15b6f604 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_emoji.c
@@ -20,7 +20,7 @@ void test__f_utf_is_emoji__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c b/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c

index 5801e8147b0e65cbbbd49745abda072e566ef475..c3974860cfbd4e29b07d240daa7e34e17524995f 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_numeric.c
@@ -20,7 +20,7 @@ void test__f_utf_is_numeric__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c b/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c

index 88902b2021cba06f892151efc7fd4a42c9471b55..4e2b3aaae4155f99c4d673c2fdf8f852b5a31016 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c
@@ -20,7 +20,7 @@ void test__f_utf_is_phonetic__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_private.c b/level_0/f_utf/tests/unit/c/test-utf-is_private.c

index b5fdbef1cd341835b3fb45a6462db22f3843e0d1..e9d5f522766ad609fc3cbbdc548e6622a3cfde19 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_private.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_private.c
@@ -20,7 +20,7 @@ void test__f_utf_is_private__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c b/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c

index 54166b27ece29482382de66aeb89beee4f303419..ca7f9e2f9be021ba77a575bc509e98add89145ff 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c
@@ -20,7 +20,7 @@ void test__f_utf_is_punctuation__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c b/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c

index e04d93feb466887795fdf14e2ecd873d4a9018c6..492493181a84df83713da84ee1a01c3076c8e8c9 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_subscript.c
@@ -20,7 +20,7 @@ void test__f_utf_is_subscript__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c b/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c

index 2fec11b076bccec680c305b4374fc974785eb2e5..9180f95db946a55464ddf18a5978b8350791a5d4 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_superscript.c
@@ -20,7 +20,7 @@ void test__f_utf_is_superscript__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c b/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c

index cac93b65da51a749a7f2928df0a174e66cf99565..0646b0b0edc30159af92c7b05684978eaf2884d5 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_symbol.c
@@ -20,7 +20,7 @@ void test__f_utf_is_symbol__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c b/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c

index c44972788a83a24254a9780d75a58445e430466c..eede6039acf1da2631edb6de01b9dc64856608db 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c
@@ -20,7 +20,7 @@ void test__f_utf_is_whitespace__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_wide.c b/level_0/f_utf/tests/unit/c/test-utf-is_wide.c

index d2efc6f91fb01cbdf5905a1d7126d62759c61260..ddab706e6ce9e3579a910fa33b3f6c1e30c17b34 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_wide.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_wide.c
@@ -20,7 +20,7 @@ void test__f_utf_is_wide__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_word.c b/level_0/f_utf/tests/unit/c/test-utf-is_word.c

index f9d5e0bbce2232db108562aeb9ae506df8e46fde..14bef6abb759ec41a21c5d374749bb619088f61b 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_word.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_word.c
@@ -20,7 +20,7 @@ void test__f_utf_is_word__strict_is_false(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
@@ -67,7 +67,7 @@ void test__f_utf_is_word__strict_is_true(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c b/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c

index 00f6329aef122d0e176965faa16bc9b6552d7f5c..a3c183e67e8f436abefd7174dcd05a2c7eef486d 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c
+++ b/level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c
@@ -20,7 +20,7 @@ void test__f_utf_is_zero_width__works(void **state) {
      do {
        bytes = data__bytesequence_get_line(file, &sequence);
  
-      if (bytes) {
+      if (bytes > 0) {
          const uint8_t width = macro_f_utf_char_t_width(sequence);
          char buffer[5] = { 0, 0, 0, 0, 0 };
  
diff --git a/level_0/f_utf/tests/unit/c/test-utf.c b/level_0/f_utf/tests/unit/c/test-utf.c

index 3661bdba75dcd37ae0c9e2164d63c84bba59d247..b5e80795d91e681952014635b3723670e29dc569 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf.c
+++ b/level_0/f_utf/tests/unit/c/test-utf.c
@@ -25,8 +25,12 @@ int main(void) {
      cmocka_unit_test(test__f_utf_append_nulless__works),
  
      //cmocka_unit_test(test__f_utf_character_is_alphabetic__works),
+    //cmocka_unit_test(test__f_utf_character_is_alphabetic_decimal__works),
+    //cmocka_unit_test(test__f_utf_character_is_alphabetic_digit__works),
+    //cmocka_unit_test(test__f_utf_character_is_alphabetic_numeric__works),
      cmocka_unit_test(test__f_utf_character_is_combining__works),
      cmocka_unit_test(test__f_utf_character_is_control__works),
+    cmocka_unit_test(test__f_utf_character_is_decimal__works),
      cmocka_unit_test(test__f_utf_character_is_digit__works),
      cmocka_unit_test(test__f_utf_character_is_emoji__works),
      //cmocka_unit_test(test__f_utf_character_is_numeric__works),
@@ -133,8 +137,12 @@ int main(void) {
      cmocka_unit_test(test__f_utf_dynamicss_resize__works),
  
      //cmocka_unit_test(test__f_utf_is_alphabetic__works),
+    //cmocka_unit_test(test__f_utf_is_alphabetic_decimal__works),
+    //cmocka_unit_test(test__f_utf_is_alphabetic_digit__works),
+    //cmocka_unit_test(test__f_utf_is_alphabetic_numeric__works),
      cmocka_unit_test(test__f_utf_is_combining__works),
      cmocka_unit_test(test__f_utf_is_control__works),
+    cmocka_unit_test(test__f_utf_is_decimal__works),
      cmocka_unit_test(test__f_utf_is_digit__works),
      cmocka_unit_test(test__f_utf_is_emoji__works),
      //cmocka_unit_test(test__f_utf_is_numeric__works),
diff --git a/level_0/f_utf/tests/unit/c/test-utf.h b/level_0/f_utf/tests/unit/c/test-utf.h

index 8deda57a049c5b961c01e4154e89451f337ae2e5..a8069e96fbb5fa96d19ffc082e4cbf1487bf67d6 100644 (file)
--- a/level_0/f_utf/tests/unit/c/test-utf.h
+++ b/level_0/f_utf/tests/unit/c/test-utf.h
@@ -36,6 +36,7 @@
  #include "test-utf-character_is_alphabetic.h"
  #include "test-utf-character_is_combining.h"
  #include "test-utf-character_is_control.h"
+#include "test-utf-character_is_decimal.h"
  #include "test-utf-character_is_digit.h"
  #include "test-utf-character_is_emoji.h"
  #include "test-utf-character_is_numeric.h"
@@ -104,6 +105,7 @@
  #include "test-utf-is_alphabetic.h"
  #include "test-utf-is_combining.h"
  #include "test-utf-is_control.h"
+#include "test-utf-is_decimal.h"
  #include "test-utf-is_digit.h"
  #include "test-utf-is_emoji.h"
  #include "test-utf-is_numeric.h"
diff --git a/level_3/controller/c/controller/private-controller.c b/level_3/controller/c/controller/private-controller.c

index 36be1b1df196e1f57115b852b13bfed2e5af4a34..ce50b9a23e7588c0137d756dda8b620590d40a38 100644 (file)
--- a/level_3/controller/c/controller/private-controller.c
+++ b/level_3/controller/c/controller/private-controller.c
@@ -786,7 +786,7 @@ extern "C" {
  
        if (name.string[i] == '_') continue;
  
-      status = f_utf_is_alphabetic_digit(name.string, name.used);
+      status = f_utf_is_alphabetic_decimal(name.string, name.used);
  
        if (F_status_is_error(status)) return status;
        if (status == F_false) return F_false;
diff --git a/level_3/controller/c/controller/private-controller.h b/level_3/controller/c/controller/private-controller.h

index a8531514f8a137697c64f6ac7f47887ebe8c622c..618b113ef86c9c2ab6e179766691a335d4ec5e8a 100644 (file)
--- a/level_3/controller/c/controller/private-controller.h
+++ b/level_3/controller/c/controller/private-controller.h
@@ -381,10 +381,10 @@ extern "C" {
   *   F_none if there is no string to validate (used = 0).
   *
   *   Errors (with error bit) from: f_utf_is_alphabetic().
- *   Errors (with error bit) from: f_utf_is_alphabetic_digit().
+ *   Errors (with error bit) from: f_utf_is_alphabetic_decimal().
   *
   * @see f_utf_is_alphabetic()
- * @see f_utf_is_alphabetic_digit()
+ * @see f_utf_is_alphabetic_decimal()
   */
  #ifndef _di_controller_validate_define_name_
    extern f_status_t controller_validate_environment_name(const f_string_static_t name) F_attribute_visibility_internal_d;
author	Kevin Day <thekevinday@gmail.com>
	Mon, 20 Jun 2022 04:42:18 +0000 (23:42 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Mon, 20 Jun 2022 05:04:15 +0000 (00:04 -0500)
build/level_0/settings		patch \| blob \| history
build/monolithic/settings		patch \| blob \| history
level_0/f_utf/c/private-utf.c		patch \| blob \| history
level_0/f_utf/c/private-utf.h		patch \| blob \| history
level_0/f_utf/c/private-utf_alphabetic.c		patch \| blob \| history
level_0/f_utf/c/private-utf_alphabetic.h		patch \| blob \| history
level_0/f_utf/c/private-utf_decimal.c	[new file with mode: 0644]	patch \| blob
level_0/f_utf/c/private-utf_decimal.h	[new file with mode: 0644]	patch \| blob
level_0/f_utf/c/private-utf_digit.c		patch \| blob \| history
level_0/f_utf/c/private-utf_word.c		patch \| blob \| history
level_0/f_utf/c/utf/common.h		patch \| blob \| history
level_0/f_utf/c/utf/is.c		patch \| blob \| history
level_0/f_utf/c/utf/is.h		patch \| blob \| history
level_0/f_utf/c/utf/is_character.c		patch \| blob \| history
level_0/f_utf/c/utf/is_character.h		patch \| blob \| history
level_0/f_utf/data/build/settings		patch \| blob \| history
level_0/f_utf/data/build/settings-tests		patch \| blob \| history
level_0/f_utf/data/tests/bytesequences/decimal-all.txt	[new file with mode: 0644]	patch \| blob
level_0/f_utf/data/tests/bytesequences/digit-all.txt		patch \| blob \| history
level_0/f_utf/data/tests/codepoints/decimal-all.txt	[new file with mode: 0644]	patch \| blob
level_0/f_utf/data/tests/codepoints/digit-all.txt		patch \| blob \| history
level_0/f_utf/data/tests/values/decimal-all.txt	[new file with mode: 0644]	patch \| blob
level_0/f_utf/tests/unit/c/data-utf.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/data-utf.h		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.c	[new file with mode: 0644]	patch \| blob
level_0/f_utf/tests/unit/c/test-utf-character_is_decimal.h	[new file with mode: 0644]	patch \| blob
level_0/f_utf/tests/unit/c/test-utf-is_alphabetic.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_combining.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_control.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_decimal.c	[new file with mode: 0644]	patch \| blob
level_0/f_utf/tests/unit/c/test-utf-is_decimal.h	[new file with mode: 0644]	patch \| blob
level_0/f_utf/tests/unit/c/test-utf-is_digit.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_emoji.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_numeric.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_phonetic.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_private.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_punctuation.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_subscript.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_superscript.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_symbol.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_whitespace.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_wide.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_word.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf-is_zero_width.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf.c		patch \| blob \| history
level_0/f_utf/tests/unit/c/test-utf.h		patch \| blob \| history
level_3/controller/c/controller/private-controller.c		patch \| blob \| history
level_3/controller/c/controller/private-controller.h		patch \| blob \| history