Update: The f_utf_is_* functions should be more specific on return state for F_failure.

author Kevin Day <thekevinday@gmail.com>

Wed, 11 May 2022 03:17:38 +0000 (22:17 -0500)

committer Kevin Day <thekevinday@gmail.com>

Wed, 11 May 2022 03:17:38 +0000 (22:17 -0500)
author Kevin Day <thekevinday@gmail.com>
Wed, 11 May 2022 03:17:38 +0000 (22:17 -0500)
committer Kevin Day <thekevinday@gmail.com>
Wed, 11 May 2022 03:17:38 +0000 (22:17 -0500)
diff --git a/level_0/f_utf/c/private-utf.c b/level_0/f_utf/c/private-utf.c

index 5ba9cd99310f2c8f67ad20e939aef141b17d4da7..300c7fb4dfa1e0ef7749fa7722924032d9f5bae0 100644 (file)
--- a/level_0/f_utf/c/private-utf.c
+++ b/level_0/f_utf/c/private-utf.c
@@ -19,7 +19,7 @@ extern "C" {
      }
  
      if (macro_f_utf_byte_width_is(*character) > width_max) {
-      return F_status_set_error(F_failure);
+      return F_status_set_error(F_complete_not_utf);
      }
  
      *character_utf = macro_f_utf_char_t_from_char_1(character[0]);
diff --git a/level_0/f_utf/c/private-utf.h b/level_0/f_utf/c/private-utf.h

index 379e6ad589a9c6309873f2644c80f41c14d903b1..88e1860aae6776c3983393bb90d9066e551b8cdb 100644 (file)
--- a/level_0/f_utf/c/private-utf.h
+++ b/level_0/f_utf/c/private-utf.h
@@ -35,7 +35,7 @@ extern "C" {
   * @return
   *   F_none if conversion was successful.
   *
- *   F_failure (with error bit) if width_max is not long enough to convert.
+ *   F_complete_not_utf (with error bit) if character is an incomplete UTF-8 sequence.
   *   F_parameter (with error bit) if a parameter is invalid.
   *   F_utf (with error bit) if unicode is an invalid Unicode character.
   *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
diff --git a/level_0/f_utf/c/utf/is.c b/level_0/f_utf/c/utf/is.c

index 2a63837432115a910dfa40940eb7ef5caff0b957..cedb9f888964986c6675be12796422dce8a3ffb1 100644 (file)
--- a/level_0/f_utf/c/utf/is.c
+++ b/level_0/f_utf/c/utf/is.c
@@ -21,7 +21,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -54,7 +54,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -87,7 +87,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -120,7 +120,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -142,7 +142,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -172,7 +172,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -201,7 +201,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -234,7 +234,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -264,7 +264,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -298,7 +298,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -331,7 +331,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -375,7 +375,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -421,7 +421,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -454,7 +454,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -484,7 +484,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -514,7 +514,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -573,7 +573,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -617,7 +617,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -647,7 +647,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -677,7 +677,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -707,7 +707,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -740,7 +740,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -770,7 +770,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -797,7 +797,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -827,7 +827,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -860,7 +860,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -893,7 +893,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
@@ -926,7 +926,7 @@ extern "C" {
  
      if (macro_f_utf_byte_width_is(*character)) {
        if (macro_f_utf_byte_width_is(*character) > width_max) {
-        return F_status_set_error(F_failure);
+        return F_status_set_error(F_complete_not_utf);
        }
  
        if (macro_f_utf_byte_width_is(*character) == 1) {
diff --git a/level_0/f_utf/c/utf/is.h b/level_0/f_utf/c/utf/is.h

index 099bfe3ce952fbf97ef33cff458e8013eacfd52d..69d243834a79a77ce5469fbd56926b2daccba32e 100644 (file)
--- a/level_0/f_utf/c/utf/is.h
+++ b/level_0/f_utf/c/utf/is.h
@@ -47,8 +47,9 @@ extern "C" {
   *   F_true if a UTF-8 alphabet character.
   *   F_false if not a UTF-8 alphabet character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isalpha()
   */
@@ -74,8 +75,9 @@ extern "C" {
   *   F_true if a UTF-8 alphabet character.
   *   F_false if not a UTF-8 alpha-numeric character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isalnum()
   */
@@ -99,8 +101,9 @@ extern "C" {
   *   F_true if a UTF-8 alphabet character.
   *   F_false if not a UTF-8 alpha-numeric character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isalnum()
   */
@@ -122,8 +125,9 @@ extern "C" {
   *   F_true if an ASCII character.
   *   F_false if not an ASCII character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_ascii_
    extern f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max);
@@ -143,8 +147,9 @@ extern "C" {
   *   F_true if a UTF-8 combining character.
   *   F_false if not a UTF-8 combining character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_combining_
    extern f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max);
@@ -166,8 +171,9 @@ extern "C" {
   *   F_true if a UTF-8 control character.
   *   F_false if not a UTF-8 control character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see iscntrl()
   */
@@ -191,8 +197,9 @@ extern "C" {
   *   F_true if a UTF-8 control code character.
   *   F_false if not a UTF-8 control code character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_control_code_
    extern f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max);
@@ -215,8 +222,9 @@ extern "C" {
   *   F_true if a UTF-8 control format character.
   *   F_false if not a UTF-8 control format character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_control_format_
    extern f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max);
@@ -238,8 +246,9 @@ extern "C" {
   *   F_true if a UTF-8 control picture character.
   *   F_false if not a UTF-8 control picture character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_control_picture_
    extern f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max);
@@ -259,8 +268,9 @@ extern "C" {
   *   F_true if a UTF-8 digit character.
   *   F_false if not a UTF-8 digit character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isdigit()
   */
@@ -284,8 +294,9 @@ extern "C" {
   *   F_true if a UTF-8 emoji character.
   *   F_false if not a UTF-8 emoji character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_emoji_
    extern f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max);
@@ -336,10 +347,11 @@ extern "C" {
   *   F_true if a UTF-8 graph.
   *   F_false if not a UTF-8 graph.
   *
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
   *   F_maybe (with error bit) if this could be a graph but width is not long enough.
   *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isgraph()
   */
@@ -363,8 +375,9 @@ extern "C" {
   *   F_true if a UTF-8 numeric character.
   *   F_false if not a UTF-8 numeric character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isdigit()
   */
@@ -386,8 +399,9 @@ extern "C" {
   *   F_true if a UTF-8 phonetic character.
   *   F_false if not a UTF-8 phonetic character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_phonetic_
    extern f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max);
@@ -407,8 +421,9 @@ extern "C" {
   *   F_true if a UTF-8 punctuation character.
   *   F_false if not a UTF-8 punctuation character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_private_
    extern f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max);
@@ -430,8 +445,9 @@ extern "C" {
   *   F_true if a UTF-8 punctuation character.
   *   F_false if not a UTF-8 punctuation character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_punctuation_
    extern f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max);
@@ -451,8 +467,9 @@ extern "C" {
   *   F_true if a UTF-8 symbol character.
   *   F_false if not a UTF-8 symbol character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_surrogate_
    extern f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max);
@@ -474,8 +491,9 @@ extern "C" {
   *   F_true if a UTF-8 symbol character.
   *   F_false if not a UTF-8 symbol character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_symbol_
    extern f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max);
@@ -495,9 +513,10 @@ extern "C" {
   *   F_true if an unassigned UTF-8 character.
   *   F_false if not an unassigned UTF-8 character.
   *
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
   *   F_parameter (with error bit) if a parameter is inunassigned.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_unassigned_
    extern f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max);
@@ -524,9 +543,10 @@ extern "C" {
   *   F_true if a valid UTF-8 character or is an ASCII character.
   *   F_false if not a valid UTF-8 character.
   *
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
   *   F_failure (with error bit) if width_max is not long enough to convert.
   *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_valid_
    extern f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max);
@@ -554,10 +574,11 @@ extern "C" {
   *   F_true if a UTF-8 whitespace.
   *   F_false if not a UTF-8 whitespace.
   *
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
   *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
   *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isspace()
   */
@@ -584,10 +605,11 @@ extern "C" {
   *   F_true if a UTF-8 whitespace.
   *   F_false if not a UTF-8 whitespace.
   *
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
   *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
   *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_whitespace_modifier_
    extern f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max);
@@ -609,10 +631,11 @@ extern "C" {
   *   F_true if a UTF-8 whitespace.
   *   F_false if not a UTF-8 whitespace.
   *
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
   *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
   *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_whitespace_other_
    extern f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max);
@@ -637,10 +660,10 @@ extern "C" {
   * @return
   *   F_none on success.
   *
- *   F_failure (with error bit) if width_max is not long enough to convert.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
   *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_wide_
    extern f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max);
@@ -666,8 +689,9 @@ extern "C" {
   *   F_true if a UTF-8 word character.
   *   F_false if not a UTF-8 word character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isalnum()
   */
@@ -700,8 +724,9 @@ extern "C" {
   *   F_true if a UTF-8 word or dash character.
   *   F_false if not a UTF-8 word or dash character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isalnum()
   */
@@ -736,8 +761,9 @@ extern "C" {
   *   F_true if a UTF-8 word or dash character.
   *   F_false if not a UTF-8 word or dash character.
   *
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   *
   * @see isalnum()
   */
@@ -761,10 +787,11 @@ extern "C" {
   *   F_true if a UTF-8 whitespace.
   *   F_false if not a UTF-8 whitespace.
   *
+ *   F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
   *   F_maybe (with error bit) if this could be a whitespace but width is not long enough.
   *   F_parameter (with error bit) if a parameter is invalid.
- *   F_utf (with error bit) if unicode is an invalid Unicode character.
- *   F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment.
+ *   F_utf (with error bit) if Unicode is an invalid Unicode character.
+ *   F_utf_fragment (with error bit) if character is a UTF-8 fragment.
   */
  #ifndef _di_f_utf_is_zero_width_
    extern f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max);
diff --git a/level_2/fll_error/c/private-error.c b/level_2/fll_error/c/private-error.c

index cbbde446d52cca4960df498ff46ac67d72c1bf51..408213aa202ebd3178bff25ccadfbb3e0b7fb4ec 100644 (file)
--- a/level_2/fll_error/c/private-error.c
+++ b/level_2/fll_error/c/private-error.c
@@ -152,11 +152,27 @@ extern "C" {
        return F_false;
      }
  
+    if (status == F_complete_not_utf) {
+      if (print.verbosity != f_console_verbosity_quiet_e) {
+        flockfile(print.to.stream);
+
+        fl_print_format("%r%[%QInvalid UTF-8 character (truncated) found", print.to.stream, f_string_eol_s, print.context, print.prefix);
+
+        private_fll_error_print_function(print, function);
+
+        fl_print_format(".%]%r", print.to.stream, print.context, f_string_eol_s);
+
+        funlockfile(print.to.stream);
+      }
+
+      return F_false;
+    }
+
      if (status == F_utf_fragment) {
        if (print.verbosity != f_console_verbosity_quiet_e) {
          flockfile(print.to.stream);
  
-        fl_print_format("%r%[%QInvalid UTF-8 character (Fragment) found", print.to.stream, f_string_eol_s, print.context, print.prefix);
+        fl_print_format("%r%[%QInvalid UTF-8 character (fragment) found", print.to.stream, f_string_eol_s, print.context, print.prefix);
  
          private_fll_error_print_function(print, function);
  
diff --git a/level_3/utf8/c/private-print.c b/level_3/utf8/c/private-print.c

index 754c7d746492f22a3b02ef8b3421a23165238608..30fc85628e89da19923d1e74b656ab04d071a5dc 100644 (file)
--- a/level_3/utf8/c/private-print.c
+++ b/level_3/utf8/c/private-print.c
@@ -113,6 +113,9 @@ extern "C" {
      if (F_status_set_fine(status) == F_utf) {
        fl_print_format("%[', not a valid UTF-8 character sequence.%]%r", data->main->error.to.stream, data->main->context.set.error, data->main->context.set.error, f_string_eol_s);
      }
+    else if (F_status_set_fine(status) == F_complete_not_utf) {
+      fl_print_format("%[', invalid UTF-8 (truncated).%]%r", data->main->error.to.stream, data->main->context.set.error, data->main->context.set.error, f_string_eol_s);
+    }
      else if (F_status_set_fine(status) == F_utf_fragment) {
        fl_print_format("%[', invalid UTF-8 fragment.%]%r", data->main->error.to.stream, data->main->context.set.error, data->main->context.set.error, f_string_eol_s);
      }
diff --git a/level_3/utf8/c/private-utf8_bytecode.c b/level_3/utf8/c/private-utf8_bytecode.c

index 77271d10c2ed985b664fa265c192bd8bfc21dbdb..e5d2576b55e436312fef87f91a47b125b429d19d 100644 (file)
--- a/level_3/utf8/c/private-utf8_bytecode.c
+++ b/level_3/utf8/c/private-utf8_bytecode.c
@@ -27,7 +27,7 @@ extern "C" {
      if (F_status_is_error(status)) {
        status = F_status_set_fine(status);
  
-      if (status == F_failure || status == F_utf || status == F_utf_fragment || status == F_valid_not) {
+      if (status == F_failure || status == F_utf || status == F_complete_not_utf || status == F_utf_fragment || status == F_valid_not) {
          valid_not = F_true;
  
          utf8_print_character_invalid(data, character);
diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c

index 5b8e9273db728e6d27e43ac1fb1f4fdb01fc41ee..15579d61e96b87acf5f2c8b22b3a984de1d6ea2e 100644 (file)
--- a/level_3/utf8/c/private-utf8_codepoint.c
+++ b/level_3/utf8/c/private-utf8_codepoint.c
@@ -38,7 +38,7 @@ extern "C" {
        if (F_status_is_error(status)) {
          status = F_status_set_fine(status);
  
-        if (status == F_failure || status == F_utf || status == F_utf_fragment || status == F_valid_not) {
+        if (status == F_failure || status == F_utf || status == F_complete_not_utf || status == F_utf_fragment || status == F_valid_not) {
            valid_not = F_true;
  
            utf8_print_character_invalid(data, character);
@@ -113,7 +113,7 @@ extern "C" {
        status = f_utf_is_whitespace(character.string, 4);
  
        if (F_status_is_error(status)) {
-        if (F_status_set_fine(status) == F_utf_fragment) {
+        if (F_status_set_fine(status) == F_complete_not_utf || F_status_set_fine(status) == F_utf_fragment) {
            status = F_valid_not;
          }
          else {
@@ -129,11 +129,11 @@ extern "C" {
        }
      }
      else {
-      if (character.string[0] < 0x30 || character.string[0] > 0x39 && character.string[0] < 0x41 || character.string[0] > 0x46 && character.string[0] < 0x61 || character.string[0] > 0x66) {
+      if (character.string[0] < 0x30 || character.string[0] > (0x39 && character.string[0] < 0x41) || (character.string[0] > 0x46 && character.string[0] < 0x61) || character.string[0] > 0x66) {
          status = f_utf_is_whitespace(character.string, 4);
  
          if (F_status_is_error(status)) {
-          if (F_status_set_fine(status) == F_utf_fragment) {
+          if (F_status_set_fine(status) == F_complete_not_utf || F_status_set_fine(status) == F_utf_fragment) {
              status = F_valid_not;
            }
            else {
diff --git a/level_3/utf8/c/utf8.c b/level_3/utf8/c/utf8.c

index 42b77778abda7adca3c45e2d7c09fe760c424ca9..4b2f1703ab565b5aeedcb3484886580c1ef37048 100644 (file)
--- a/level_3/utf8/c/utf8.c
+++ b/level_3/utf8/c/utf8.c
@@ -385,7 +385,7 @@ extern "C" {
            status = utf8_process_file_codepoint(&data, file);
          }
  
-        if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment) {
+        if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment && F_status_set_fine(status) != F_complete_not_utf) {
            fll_error_file_print(main->error, F_status_set_fine(status), data.mode & utf8_mode_from_bytecode_d ? "utf8_process_file_bytecode" : "utf8_process_file_codepoint", F_true, f_string_empty_s, f_file_operation_process_s, fll_error_file_type_pipe_e);
          }
        }
@@ -435,7 +435,7 @@ extern "C" {
              }
            }
  
-          if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment) {
+          if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment && F_status_set_fine(status) != F_complete_not_utf) {
              fll_error_file_print(main->error, F_status_set_fine(status), data.mode & utf8_mode_from_bytecode_d ? "utf8_process_file_bytecode" : "utf8_process_file_codepoint", F_true, data.argv[index], f_file_operation_process_s, fll_error_file_type_file_e);
  
              break;
author	Kevin Day <thekevinday@gmail.com>
	Wed, 11 May 2022 03:17:38 +0000 (22:17 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Wed, 11 May 2022 03:17:38 +0000 (22:17 -0500)
level_0/f_utf/c/private-utf.c		patch \| blob \| history
level_0/f_utf/c/private-utf.h		patch \| blob \| history
level_0/f_utf/c/utf/is.c		patch \| blob \| history
level_0/f_utf/c/utf/is.h		patch \| blob \| history
level_2/fll_error/c/private-error.c		patch \| blob \| history
level_3/utf8/c/private-print.c		patch \| blob \| history
level_3/utf8/c/private-utf8_bytecode.c		patch \| blob \| history
level_3/utf8/c/private-utf8_codepoint.c		patch \| blob \| history
level_3/utf8/c/utf8.c		patch \| blob \| history