From: Kevin Day Date: Wed, 11 May 2022 03:17:38 +0000 (-0500) Subject: Update: The f_utf_is_* functions should be more specific on return state for F_failure. X-Git-Tag: 0.5.10~154 X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=1f593a0e49a696fe1768eb16fca610bef4cf2b8d;p=fll Update: The f_utf_is_* functions should be more specific on return state for F_failure. Rather than returning F_failure, return F_complete_not_utf. --- diff --git a/level_0/f_utf/c/private-utf.c b/level_0/f_utf/c/private-utf.c index 5ba9cd9..300c7fb 100644 --- a/level_0/f_utf/c/private-utf.c +++ b/level_0/f_utf/c/private-utf.c @@ -19,7 +19,7 @@ extern "C" { } if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } *character_utf = macro_f_utf_char_t_from_char_1(character[0]); diff --git a/level_0/f_utf/c/private-utf.h b/level_0/f_utf/c/private-utf.h index 379e6ad..88e1860 100644 --- a/level_0/f_utf/c/private-utf.h +++ b/level_0/f_utf/c/private-utf.h @@ -35,7 +35,7 @@ extern "C" { * @return * F_none if conversion was successful. * - * F_failure (with error bit) if width_max is not long enough to convert. + * F_complete_not_utf (with error bit) if character is an incomplete UTF-8 sequence. * F_parameter (with error bit) if a parameter is invalid. * F_utf (with error bit) if unicode is an invalid Unicode character. * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. diff --git a/level_0/f_utf/c/utf/is.c b/level_0/f_utf/c/utf/is.c index 2a63837..cedb9f8 100644 --- a/level_0/f_utf/c/utf/is.c +++ b/level_0/f_utf/c/utf/is.c @@ -21,7 +21,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -54,7 +54,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -87,7 +87,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -120,7 +120,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -142,7 +142,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -172,7 +172,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -201,7 +201,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -234,7 +234,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -264,7 +264,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -298,7 +298,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -331,7 +331,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -375,7 +375,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -421,7 +421,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -454,7 +454,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -484,7 +484,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -514,7 +514,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -573,7 +573,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -617,7 +617,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -647,7 +647,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -677,7 +677,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -707,7 +707,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -740,7 +740,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -770,7 +770,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -797,7 +797,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -827,7 +827,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -860,7 +860,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -893,7 +893,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { @@ -926,7 +926,7 @@ extern "C" { if (macro_f_utf_byte_width_is(*character)) { if (macro_f_utf_byte_width_is(*character) > width_max) { - return F_status_set_error(F_failure); + return F_status_set_error(F_complete_not_utf); } if (macro_f_utf_byte_width_is(*character) == 1) { diff --git a/level_0/f_utf/c/utf/is.h b/level_0/f_utf/c/utf/is.h index 099bfe3..69d2438 100644 --- a/level_0/f_utf/c/utf/is.h +++ b/level_0/f_utf/c/utf/is.h @@ -47,8 +47,9 @@ extern "C" { * F_true if a UTF-8 alphabet character. * F_false if not a UTF-8 alphabet character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isalpha() */ @@ -74,8 +75,9 @@ extern "C" { * F_true if a UTF-8 alphabet character. * F_false if not a UTF-8 alpha-numeric character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isalnum() */ @@ -99,8 +101,9 @@ extern "C" { * F_true if a UTF-8 alphabet character. * F_false if not a UTF-8 alpha-numeric character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isalnum() */ @@ -122,8 +125,9 @@ extern "C" { * F_true if an ASCII character. * F_false if not an ASCII character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_ascii_ extern f_status_t f_utf_is_ascii(const f_string_t character, const f_array_length_t width_max); @@ -143,8 +147,9 @@ extern "C" { * F_true if a UTF-8 combining character. * F_false if not a UTF-8 combining character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_combining_ extern f_status_t f_utf_is_combining(const f_string_t character, const f_array_length_t width_max); @@ -166,8 +171,9 @@ extern "C" { * F_true if a UTF-8 control character. * F_false if not a UTF-8 control character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see iscntrl() */ @@ -191,8 +197,9 @@ extern "C" { * F_true if a UTF-8 control code character. * F_false if not a UTF-8 control code character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_control_code_ extern f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max); @@ -215,8 +222,9 @@ extern "C" { * F_true if a UTF-8 control format character. * F_false if not a UTF-8 control format character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_control_format_ extern f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max); @@ -238,8 +246,9 @@ extern "C" { * F_true if a UTF-8 control picture character. * F_false if not a UTF-8 control picture character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_control_picture_ extern f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max); @@ -259,8 +268,9 @@ extern "C" { * F_true if a UTF-8 digit character. * F_false if not a UTF-8 digit character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isdigit() */ @@ -284,8 +294,9 @@ extern "C" { * F_true if a UTF-8 emoji character. * F_false if not a UTF-8 emoji character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_emoji_ extern f_status_t f_utf_is_emoji(const f_string_t character, const f_array_length_t width_max); @@ -336,10 +347,11 @@ extern "C" { * F_true if a UTF-8 graph. * F_false if not a UTF-8 graph. * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_maybe (with error bit) if this could be a graph but width is not long enough. * F_parameter (with error bit) if a parameter is invalid. - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isgraph() */ @@ -363,8 +375,9 @@ extern "C" { * F_true if a UTF-8 numeric character. * F_false if not a UTF-8 numeric character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isdigit() */ @@ -386,8 +399,9 @@ extern "C" { * F_true if a UTF-8 phonetic character. * F_false if not a UTF-8 phonetic character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_phonetic_ extern f_status_t f_utf_is_phonetic(const f_string_t character, const f_array_length_t width_max); @@ -407,8 +421,9 @@ extern "C" { * F_true if a UTF-8 punctuation character. * F_false if not a UTF-8 punctuation character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_private_ extern f_status_t f_utf_is_private(const f_string_t character, const f_array_length_t width_max); @@ -430,8 +445,9 @@ extern "C" { * F_true if a UTF-8 punctuation character. * F_false if not a UTF-8 punctuation character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_punctuation_ extern f_status_t f_utf_is_punctuation(const f_string_t character, const f_array_length_t width_max); @@ -451,8 +467,9 @@ extern "C" { * F_true if a UTF-8 symbol character. * F_false if not a UTF-8 symbol character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_surrogate_ extern f_status_t f_utf_is_surrogate(const f_string_t character, const f_array_length_t width_max); @@ -474,8 +491,9 @@ extern "C" { * F_true if a UTF-8 symbol character. * F_false if not a UTF-8 symbol character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_symbol_ extern f_status_t f_utf_is_symbol(const f_string_t character, const f_array_length_t width_max); @@ -495,9 +513,10 @@ extern "C" { * F_true if an unassigned UTF-8 character. * F_false if not an unassigned UTF-8 character. * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_parameter (with error bit) if a parameter is inunassigned. - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_unassigned_ extern f_status_t f_utf_is_unassigned(const f_string_t character, const f_array_length_t width_max); @@ -524,9 +543,10 @@ extern "C" { * F_true if a valid UTF-8 character or is an ASCII character. * F_false if not a valid UTF-8 character. * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_failure (with error bit) if width_max is not long enough to convert. * F_parameter (with error bit) if a parameter is invalid. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_valid_ extern f_status_t f_utf_is_valid(const f_string_t character, const f_array_length_t width_max); @@ -554,10 +574,11 @@ extern "C" { * F_true if a UTF-8 whitespace. * F_false if not a UTF-8 whitespace. * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_maybe (with error bit) if this could be a whitespace but width is not long enough. * F_parameter (with error bit) if a parameter is invalid. - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isspace() */ @@ -584,10 +605,11 @@ extern "C" { * F_true if a UTF-8 whitespace. * F_false if not a UTF-8 whitespace. * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_maybe (with error bit) if this could be a whitespace but width is not long enough. * F_parameter (with error bit) if a parameter is invalid. - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_whitespace_modifier_ extern f_status_t f_utf_is_whitespace_modifier(const f_string_t character, const f_array_length_t width_max); @@ -609,10 +631,11 @@ extern "C" { * F_true if a UTF-8 whitespace. * F_false if not a UTF-8 whitespace. * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_maybe (with error bit) if this could be a whitespace but width is not long enough. * F_parameter (with error bit) if a parameter is invalid. - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_whitespace_other_ extern f_status_t f_utf_is_whitespace_other(const f_string_t character, const f_array_length_t width_max); @@ -637,10 +660,10 @@ extern "C" { * @return * F_none on success. * - * F_failure (with error bit) if width_max is not long enough to convert. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_parameter (with error bit) if a parameter is invalid. - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_wide_ extern f_status_t f_utf_is_wide(const f_string_t character, const f_array_length_t width_max); @@ -666,8 +689,9 @@ extern "C" { * F_true if a UTF-8 word character. * F_false if not a UTF-8 word character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isalnum() */ @@ -700,8 +724,9 @@ extern "C" { * F_true if a UTF-8 word or dash character. * F_false if not a UTF-8 word or dash character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isalnum() */ @@ -736,8 +761,9 @@ extern "C" { * F_true if a UTF-8 word or dash character. * F_false if not a UTF-8 word or dash character. * - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * * @see isalnum() */ @@ -761,10 +787,11 @@ extern "C" { * F_true if a UTF-8 whitespace. * F_false if not a UTF-8 whitespace. * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_maybe (with error bit) if this could be a whitespace but width is not long enough. * F_parameter (with error bit) if a parameter is invalid. - * F_utf (with error bit) if unicode is an invalid Unicode character. - * F_utf_fragment (with error bit) if character is an incomplete UTF-8 fragment. + * F_utf (with error bit) if Unicode is an invalid Unicode character. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. */ #ifndef _di_f_utf_is_zero_width_ extern f_status_t f_utf_is_zero_width(const f_string_t character, const f_array_length_t width_max); diff --git a/level_2/fll_error/c/private-error.c b/level_2/fll_error/c/private-error.c index cbbde44..408213a 100644 --- a/level_2/fll_error/c/private-error.c +++ b/level_2/fll_error/c/private-error.c @@ -152,11 +152,27 @@ extern "C" { return F_false; } + if (status == F_complete_not_utf) { + if (print.verbosity != f_console_verbosity_quiet_e) { + flockfile(print.to.stream); + + fl_print_format("%r%[%QInvalid UTF-8 character (truncated) found", print.to.stream, f_string_eol_s, print.context, print.prefix); + + private_fll_error_print_function(print, function); + + fl_print_format(".%]%r", print.to.stream, print.context, f_string_eol_s); + + funlockfile(print.to.stream); + } + + return F_false; + } + if (status == F_utf_fragment) { if (print.verbosity != f_console_verbosity_quiet_e) { flockfile(print.to.stream); - fl_print_format("%r%[%QInvalid UTF-8 character (Fragment) found", print.to.stream, f_string_eol_s, print.context, print.prefix); + fl_print_format("%r%[%QInvalid UTF-8 character (fragment) found", print.to.stream, f_string_eol_s, print.context, print.prefix); private_fll_error_print_function(print, function); diff --git a/level_3/utf8/c/private-print.c b/level_3/utf8/c/private-print.c index 754c7d7..30fc856 100644 --- a/level_3/utf8/c/private-print.c +++ b/level_3/utf8/c/private-print.c @@ -113,6 +113,9 @@ extern "C" { if (F_status_set_fine(status) == F_utf) { fl_print_format("%[', not a valid UTF-8 character sequence.%]%r", data->main->error.to.stream, data->main->context.set.error, data->main->context.set.error, f_string_eol_s); } + else if (F_status_set_fine(status) == F_complete_not_utf) { + fl_print_format("%[', invalid UTF-8 (truncated).%]%r", data->main->error.to.stream, data->main->context.set.error, data->main->context.set.error, f_string_eol_s); + } else if (F_status_set_fine(status) == F_utf_fragment) { fl_print_format("%[', invalid UTF-8 fragment.%]%r", data->main->error.to.stream, data->main->context.set.error, data->main->context.set.error, f_string_eol_s); } diff --git a/level_3/utf8/c/private-utf8_bytecode.c b/level_3/utf8/c/private-utf8_bytecode.c index 77271d1..e5d2576 100644 --- a/level_3/utf8/c/private-utf8_bytecode.c +++ b/level_3/utf8/c/private-utf8_bytecode.c @@ -27,7 +27,7 @@ extern "C" { if (F_status_is_error(status)) { status = F_status_set_fine(status); - if (status == F_failure || status == F_utf || status == F_utf_fragment || status == F_valid_not) { + if (status == F_failure || status == F_utf || status == F_complete_not_utf || status == F_utf_fragment || status == F_valid_not) { valid_not = F_true; utf8_print_character_invalid(data, character); diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c index 5b8e927..15579d6 100644 --- a/level_3/utf8/c/private-utf8_codepoint.c +++ b/level_3/utf8/c/private-utf8_codepoint.c @@ -38,7 +38,7 @@ extern "C" { if (F_status_is_error(status)) { status = F_status_set_fine(status); - if (status == F_failure || status == F_utf || status == F_utf_fragment || status == F_valid_not) { + if (status == F_failure || status == F_utf || status == F_complete_not_utf || status == F_utf_fragment || status == F_valid_not) { valid_not = F_true; utf8_print_character_invalid(data, character); @@ -113,7 +113,7 @@ extern "C" { status = f_utf_is_whitespace(character.string, 4); if (F_status_is_error(status)) { - if (F_status_set_fine(status) == F_utf_fragment) { + if (F_status_set_fine(status) == F_complete_not_utf || F_status_set_fine(status) == F_utf_fragment) { status = F_valid_not; } else { @@ -129,11 +129,11 @@ extern "C" { } } else { - if (character.string[0] < 0x30 || character.string[0] > 0x39 && character.string[0] < 0x41 || character.string[0] > 0x46 && character.string[0] < 0x61 || character.string[0] > 0x66) { + if (character.string[0] < 0x30 || character.string[0] > (0x39 && character.string[0] < 0x41) || (character.string[0] > 0x46 && character.string[0] < 0x61) || character.string[0] > 0x66) { status = f_utf_is_whitespace(character.string, 4); if (F_status_is_error(status)) { - if (F_status_set_fine(status) == F_utf_fragment) { + if (F_status_set_fine(status) == F_complete_not_utf || F_status_set_fine(status) == F_utf_fragment) { status = F_valid_not; } else { diff --git a/level_3/utf8/c/utf8.c b/level_3/utf8/c/utf8.c index 42b7777..4b2f170 100644 --- a/level_3/utf8/c/utf8.c +++ b/level_3/utf8/c/utf8.c @@ -385,7 +385,7 @@ extern "C" { status = utf8_process_file_codepoint(&data, file); } - if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment) { + if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment && F_status_set_fine(status) != F_complete_not_utf) { fll_error_file_print(main->error, F_status_set_fine(status), data.mode & utf8_mode_from_bytecode_d ? "utf8_process_file_bytecode" : "utf8_process_file_codepoint", F_true, f_string_empty_s, f_file_operation_process_s, fll_error_file_type_pipe_e); } } @@ -435,7 +435,7 @@ extern "C" { } } - if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment) { + if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment && F_status_set_fine(status) != F_complete_not_utf) { fll_error_file_print(main->error, F_status_set_fine(status), data.mode & utf8_mode_from_bytecode_d ? "utf8_process_file_bytecode" : "utf8_process_file_codepoint", F_true, data.argv[index], f_file_operation_process_s, fll_error_file_type_file_e); break;