From e0b16cff04fd1f1234d0e26a8ca50f0e256de99e Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Sat, 25 Jun 2022 10:49:30 -0500 Subject: [PATCH] Update: White space function changes. Make the is white space functions accept "strict" to be more consistent with how other functions operation. For the next development release I want to consider separate functions to avoid passing a boolean as a parameter to do this (for performance reasons). This changes behavior in some cases and if I did something wrong then there will be a regression. Look out for white space regressions specifically in the FSS programs. --- level_0/f_fss/c/fss.c | 4 +-- level_0/f_utf/c/private-utf_alphabetic.c | 6 ++-- level_0/f_utf/c/private-utf_whitespace.c | 26 ++++++++++++++- level_0/f_utf/c/private-utf_whitespace.h | 29 ++++++++++++++++- level_0/f_utf/c/utf/is.c | 36 +++++++++++++++++++-- level_0/f_utf/c/utf/is.h | 39 +++++++++++++++++++--- level_0/f_utf/c/utf/is_character.c | 22 +++++++++++-- level_0/f_utf/c/utf/is_character.h | 30 ++++++++++++++--- level_1/fl_conversion/c/private-conversion.c | 4 +-- level_1/fl_print/c/private-print.c | 48 ++++++++++++++-------------- level_1/fl_string/c/private-string.c | 22 ++++++------- level_1/fl_string/c/string.c | 8 ++--- level_1/fl_utf/c/private-utf.c | 14 ++++---- level_1/fl_utf/c/utf.c | 2 +- level_2/fll_fss/c/fss.c | 2 +- level_3/byte_dump/c/private-byte_dump.c | 2 +- level_3/utf8/c/private-utf8_codepoint.c | 4 +-- 17 files changed, 222 insertions(+), 76 deletions(-) diff --git a/level_0/f_fss/c/fss.c b/level_0/f_fss/c/fss.c index 169acfe..8f7f03b 100644 --- a/level_0/f_fss/c/fss.c +++ b/level_0/f_fss/c/fss.c @@ -168,7 +168,7 @@ extern "C" { return status; } - status = f_fss_fail_utf_to_false(state, f_utf_is_whitespace(buffer.string + range.start, width_max)); + status = f_fss_fail_utf_to_false(state, f_utf_is_whitespace(buffer.string + range.start, width_max, F_false)); if (status == F_false) { status = f_fss_fail_utf_to_false(state, f_utf_is_control(buffer.string + range.start, width_max)); @@ -272,7 +272,7 @@ extern "C" { continue; } - status = f_fss_fail_utf_to_false(state, f_utf_is_whitespace(buffer.string + range->start, width_max)); + status = f_fss_fail_utf_to_false(state, f_utf_is_whitespace(buffer.string + range->start, width_max, F_false)); if (status == F_false) { status = f_fss_fail_utf_to_false(state, f_utf_is_control(buffer.string + range->start, width_max)); diff --git a/level_0/f_utf/c/private-utf_alphabetic.c b/level_0/f_utf/c/private-utf_alphabetic.c index 5278f3e..c450433 100644 --- a/level_0/f_utf/c/private-utf_alphabetic.c +++ b/level_0/f_utf/c/private-utf_alphabetic.c @@ -44,7 +44,7 @@ extern "C" { return F_false; } - if (private_f_utf_character_is_whitespace(sequence)) { + if (private_f_utf_character_is_whitespace(sequence, F_true)) { return F_false; } @@ -100,7 +100,7 @@ extern "C" { return F_false; } - if (private_f_utf_character_is_whitespace(sequence)) { + if (private_f_utf_character_is_whitespace(sequence, F_true)) { return F_false; } @@ -156,7 +156,7 @@ extern "C" { return F_false; } - if (private_f_utf_character_is_whitespace(sequence)) { + if (private_f_utf_character_is_whitespace(sequence, F_true)) { return F_false; } diff --git a/level_0/f_utf/c/private-utf_whitespace.c b/level_0/f_utf/c/private-utf_whitespace.c index 0cd621d..6845e22 100644 --- a/level_0/f_utf/c/private-utf_whitespace.c +++ b/level_0/f_utf/c/private-utf_whitespace.c @@ -7,7 +7,7 @@ extern "C" { #endif #if !defined(_di_f_utf_character_is_whitespace_) || !defined(_di_f_utf_is_whitespace_) - f_status_t private_f_utf_character_is_whitespace(const f_utf_char_t sequence) { + f_status_t private_f_utf_character_is_whitespace(const f_utf_char_t sequence, const bool strict) { if (macro_f_utf_char_t_width_is(sequence) == 2) { @@ -21,6 +21,18 @@ extern "C" { if (macro_f_utf_char_t_width_is(sequence) == 3) { + if (macro_f_utf_char_t_to_char_1(sequence) == 0xe1) { + if (strict) { + + // Ogham: U+1680 (isn't whitespace but is technically considered one: ( )). + if (sequence == 0xe19a8000) { + return F_true; + } + } + + return F_false; + } + if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) { // General Punctuation: U+2000 to U+200A. @@ -75,6 +87,18 @@ extern "C" { } #endif // !defined(_di_f_utf_character_is_whitespace_other_) || !defined(_di_f_utf_is_whitespace_other_) +#if !defined(_di_f_utf_character_is_whitespace_zero_width_) || !defined(_di_f_utf_is_whitespace_zero_width_) + f_status_t private_f_utf_character_is_whitespace_zero_width(const f_utf_char_t sequence) { + + // General Punctuation: U+200B (isn't whitespace but is intentended to be interpreted as one in certain circumstances). + if (sequence == 0xe2808b00) { + return F_true; + } + + return F_false; + } +#endif // !defined(_di_f_utf_character_is_whitespace_zero_width_) || !defined(_di_f_utf_is_whitespace_zero_width_) + #ifdef __cplusplus } // extern "C" #endif diff --git a/level_0/f_utf/c/private-utf_whitespace.h b/level_0/f_utf/c/private-utf_whitespace.h index 8420c6b..4d1acc3 100644 --- a/level_0/f_utf/c/private-utf_whitespace.h +++ b/level_0/f_utf/c/private-utf_whitespace.h @@ -26,6 +26,9 @@ extern "C" { * * @param sequence * The byte sequence to validate as a character. + * @param strict + * When TRUE, include all appropriate characters by type as per Unicode. + * When FALSE, non-white space characters that are treated as white space by Unicode are not treated as white space. * * @return * F_true if a UTF-8 white space. @@ -38,7 +41,7 @@ extern "C" { * @see f_utf_is_whitespace() */ #if !defined(_di_f_utf_character_is_whitespace_) || !defined(_di_f_utf_is_whitespace_) - extern f_status_t private_f_utf_character_is_whitespace(const f_utf_char_t sequence) F_attribute_visibility_internal_d; + extern f_status_t private_f_utf_character_is_whitespace(const f_utf_char_t sequence, const bool strict) F_attribute_visibility_internal_d; #endif // !defined(_di_f_utf_character_is_whitespace_) || !defined(_di_f_utf_is_whitespace_) /** @@ -89,6 +92,30 @@ extern "C" { extern f_status_t private_f_utf_character_is_whitespace_other(const f_utf_char_t sequence) F_attribute_visibility_internal_d; #endif // !defined(_di_f_utf_character_is_whitespace_other_) || !defined(_di_f_utf_is_whitespace_other_) +/** + * Private implementation of f_utf_character_is_whitespace_zero_width(). + * + * Intended to be shared to each of the different implementation variations. + * + * This expects the character width to be of at least size 2. + * + * @param sequence + * The byte sequence to validate as a character. + * + * @return + * F_true if a UTF-8 white space other. + * F_false if not a UTF-8 white space other. + * + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if unicode is an invalid Unicode character. + * + * @see f_utf_character_is_whitespace_zero_width() + * @see f_utf_is_whitespace_zero_width() + */ +#if !defined(_di_f_utf_character_is_whitespace_zero_width_) || !defined(_di_f_utf_is_whitespace_zero_width_) + extern f_status_t private_f_utf_character_is_whitespace_zero_width(const f_utf_char_t sequence) F_attribute_visibility_internal_d; +#endif // !defined(_di_f_utf_character_is_whitespace_zero_width_) || !defined(_di_f_utf_is_whitespace_zero_width_) + #ifdef __cplusplus } // extern "C" #endif diff --git a/level_0/f_utf/c/utf/is.c b/level_0/f_utf/c/utf/is.c index f25f81a..e85c2db 100644 --- a/level_0/f_utf/c/utf/is.c +++ b/level_0/f_utf/c/utf/is.c @@ -396,7 +396,7 @@ extern "C" { return F_false; } - if (private_f_utf_character_is_whitespace(utf)) { + if (private_f_utf_character_is_whitespace(utf, F_true)) { return F_false; } @@ -727,7 +727,7 @@ extern "C" { #endif // _di_f_utf_is_valid_ #ifndef _di_f_utf_is_whitespace_ - f_status_t f_utf_is_whitespace(const f_string_t sequence, const f_array_length_t width_max) { + f_status_t f_utf_is_whitespace(const f_string_t sequence, const f_array_length_t width_max, const bool strict) { #ifndef _di_level_0_parameter_checking_ if (width_max < 1) return F_status_set_error(F_parameter); #endif // _di_level_0_parameter_checking_ @@ -748,7 +748,7 @@ extern "C" { if (F_status_is_error(status)) return status; } - return private_f_utf_character_is_whitespace(utf); + return private_f_utf_character_is_whitespace(utf, strict); } if (isspace(*sequence)) return F_true; @@ -817,6 +817,36 @@ extern "C" { } #endif // _di_f_utf_is_whitespace_other_ +#ifndef _di_f_utf_is_whitespace_zero_width_ + f_status_t f_utf_is_whitespace_zero_width(const f_string_t sequence, const f_array_length_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return F_status_set_error(F_parameter); + #endif // _di_level_0_parameter_checking_ + + if (macro_f_utf_byte_width_is(*sequence)) { + if (macro_f_utf_byte_width_is(*sequence) > width_max) { + return F_status_set_error(F_complete_not_utf); + } + + if (macro_f_utf_byte_width_is(*sequence) == 1) { + return F_status_set_error(F_utf_fragment); + } + + f_utf_char_t utf = 0; + + { + const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf); + if (F_status_is_error(status)) return status; + } + + return private_f_utf_character_is_whitespace_zero_width(utf); + } + + // There are no ASCII whitespace zero-width. + return F_false; + } +#endif // _di_f_utf_is_whitespace_zero_width_ + #ifndef _di_f_utf_is_wide_ f_status_t f_utf_is_wide(const f_string_t sequence, const f_array_length_t width_max) { diff --git a/level_0/f_utf/c/utf/is.h b/level_0/f_utf/c/utf/is.h index fa5339e..917b993 100644 --- a/level_0/f_utf/c/utf/is.h +++ b/level_0/f_utf/c/utf/is.h @@ -606,6 +606,9 @@ extern "C" { * @param width_max * The maximum width available for checking. * Can be anything greater than 0. + * @param strict + * When TRUE, include all appropriate characters by type as per Unicode. + * When FALSE, non-white space characters that are treated as white space by Unicode are not treated as white space. * * @return * F_true if a UTF-8 white space. @@ -620,7 +623,7 @@ extern "C" { * @see isspace() */ #ifndef _di_f_utf_is_whitespace_ - extern f_status_t f_utf_is_whitespace(const f_string_t sequence, const f_array_length_t width_max); + extern f_status_t f_utf_is_whitespace(const f_string_t sequence, const f_array_length_t width_max, const bool strict); #endif // _di_f_utf_is_whitespace_ /** @@ -639,8 +642,8 @@ extern "C" { * Can be anything greater than 0. * * @return - * F_true if a UTF-8 white space. - * F_false if not a UTF-8 white space. + * F_true if a UTF-8 (modifier) white space. + * F_false if not a UTF-8 (modifier) white space. * * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_maybe (with error bit) if this could be a white space but width is not long enough. @@ -665,8 +668,8 @@ extern "C" { * Can be anything greater than 0. * * @return - * F_true if a UTF-8 white space. - * F_false if not a UTF-8 white space. + * F_true if a UTF-8 (other) white space. + * F_false if not a UTF-8 (other) white space. * * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. * F_maybe (with error bit) if this could be a white space but width is not long enough. @@ -679,6 +682,32 @@ extern "C" { #endif // _di_f_utf_is_whitespace_other_ /** + * Check to see if the entire byte block of the character is an other type of UTF-8 space character. + * + * This is a list of white space that are actually zero-width space (which is not a space), such as Zero-Width Space (U+200B). + * + * @param sequence + * The byte sequence to validate as a character. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * F_true if a UTF-8 (zero-width) white space. + * F_false if not a UTF-8 (zero-width) white space. + * + * F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence. + * F_maybe (with error bit) if this could be a white space but width is not long enough. + * F_parameter (with error bit) if a parameter is invalid. + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if Unicode is an invalid Unicode character. + */ +#ifndef _di_f_utf_is_whitespace_zero_width_ + extern f_status_t f_utf_is_whitespace_zero_width(const f_string_t sequence, const f_array_length_t width_max); +#endif // _di_f_utf_is_whitespace_zero_width_ + +/** * Get whether or not the UTF-8 character is a wide character on display. * * This is not the wide as in width in bytes that the codepoint takes up in UTF-8. diff --git a/level_0/f_utf/c/utf/is_character.c b/level_0/f_utf/c/utf/is_character.c index 2a58183..30b958a 100644 --- a/level_0/f_utf/c/utf/is_character.c +++ b/level_0/f_utf/c/utf/is_character.c @@ -239,7 +239,7 @@ extern "C" { return F_false; } - if (private_f_utf_character_is_whitespace(sequence)) { + if (private_f_utf_character_is_whitespace(sequence, F_true)) { return F_false; } @@ -438,14 +438,14 @@ extern "C" { #endif // _di_f_utf_character_is_valid_ #ifndef _di_f_utf_character_is_whitespace_ - f_status_t f_utf_character_is_whitespace(const f_utf_char_t sequence) { + f_status_t f_utf_character_is_whitespace(const f_utf_char_t sequence, const bool strict) { if (macro_f_utf_char_t_width_is(sequence)) { if (macro_f_utf_char_t_width_is(sequence) == 1) { return F_status_set_error(F_utf_fragment); } - return private_f_utf_character_is_whitespace(sequence); + return private_f_utf_character_is_whitespace(sequence, strict); } if (isspace(macro_f_utf_char_t_to_char_1(sequence))) { @@ -488,6 +488,22 @@ extern "C" { } #endif // _di_f_utf_character_is_whitespace_other_ +#ifndef _di_f_utf_character_is_whitespace_zero_width_ + f_status_t f_utf_character_is_whitespace_zero_width(const f_utf_char_t sequence) { + + if (macro_f_utf_char_t_width_is(sequence)) { + if (macro_f_utf_char_t_width_is(sequence) == 1) { + return F_status_set_error(F_utf_fragment); + } + + return private_f_utf_character_is_whitespace_zero_width(sequence); + } + + // There are no ASCII whitespace zero-width. + return F_false; + } +#endif // _di_f_utf_character_is_whitespace_zero_width_ + #ifndef _di_f_utf_character_is_wide_ f_status_t f_utf_character_is_wide(const f_utf_char_t sequence) { diff --git a/level_0/f_utf/c/utf/is_character.h b/level_0/f_utf/c/utf/is_character.h index 56aa23c..4010a8e 100644 --- a/level_0/f_utf/c/utf/is_character.h +++ b/level_0/f_utf/c/utf/is_character.h @@ -518,6 +518,9 @@ extern "C" { * * @param sequence * The byte sequence to validate as a character. + * @param strict + * When TRUE, include all appropriate characters by type as per Unicode. + * When FALSE, non-white space characters that are treated as white space by Unicode are not treated as white space. * * @return * F_true if a UTF-8 white space. @@ -529,7 +532,7 @@ extern "C" { * @see isspace() */ #ifndef _di_f_utf_character_is_whitespace_ - extern f_status_t f_utf_character_is_whitespace(const f_utf_char_t sequence); + extern f_status_t f_utf_character_is_whitespace(const f_utf_char_t sequence, const bool strict); #endif // _di_f_utf_character_is_whitespace_ /** @@ -544,8 +547,8 @@ extern "C" { * The byte sequence to validate as a character. * * @return - * F_true if a UTF-8 modifier character. - * F_false if not a UTF-8 modifier character. + * F_true if a UTF-8 (modifier) white space character. + * F_false if not a UTF-8 (modifier) white space character. * * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * F_utf_not (with error bit) if unicode is an invalid Unicode character. @@ -568,14 +571,31 @@ extern "C" { * * F_utf_fragment (with error bit) if character is a UTF-8 fragment. * F_utf_not (with error bit) if unicode is an invalid Unicode character. - * - * @see isspace() */ #ifndef _di_f_utf_character_is_whitespace_other_ extern f_status_t f_utf_character_is_whitespace_other(const f_utf_char_t sequence); #endif // _di_f_utf_character_is_whitespace_other_ /** + * Check to see if the entire byte block of the character is an other type of UTF-8 space character. + * + * This is a list of white space that are actually zero-width space (which is not a space), such as Zero-Width Space (U+200B). + * + * @param sequence + * The byte sequence to validate as a character. + * + * @return + * F_true if a UTF-8 (zero-width) white space. + * F_false if not a UTF-8 (zero-width) white space. + * + * F_utf_fragment (with error bit) if character is a UTF-8 fragment. + * F_utf_not (with error bit) if unicode is an invalid Unicode character. + */ +#ifndef _di_f_utf_character_is_whitespace_zero_width_ + extern f_status_t f_utf_character_is_whitespace_zero_width(const f_utf_char_t sequence); +#endif // _di_f_utf_character_is_whitespace_zero_width_ + +/** * Get whether or not the UTF-8 character is a wide character on display. * * This is not the wide as in width in bytes that the codepoint takes up in UTF-8. diff --git a/level_1/fl_conversion/c/private-conversion.c b/level_1/fl_conversion/c/private-conversion.c index 7fa6642..5d8439f 100644 --- a/level_1/fl_conversion/c/private-conversion.c +++ b/level_1/fl_conversion/c/private-conversion.c @@ -344,7 +344,7 @@ extern "C" { if (!mode && !vector) { width_max = length - i; - status = f_utf_is_whitespace(string + i, width_max); + status = f_utf_is_whitespace(string + i, width_max, F_false); if (status == F_true) { offset = i + 1; @@ -501,7 +501,7 @@ extern "C" { if (!mode) { width_max = length - i; - status = f_utf_is_whitespace(string + i, width_max); + status = f_utf_is_whitespace(string + i, width_max, F_false); if (status == F_true) { offset = i + 1; diff --git a/level_1/fl_print/c/private-print.c b/level_1/fl_print/c/private-print.c index 29dfce5..7b82dcc 100644 --- a/level_1/fl_print/c/private-print.c +++ b/level_1/fl_print/c/private-print.c @@ -1236,7 +1236,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + i, stop - i); + status = f_utf_is_whitespace(string + i, stop - i, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -1293,7 +1293,7 @@ extern "C" { } } - status = f_utf_is_whitespace(string + i, stop - i); + status = f_utf_is_whitespace(string + i, stop - i, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -1338,7 +1338,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + j, stop - j); + status = f_utf_is_whitespace(string + j, stop - j, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -1505,7 +1505,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + i, stop - i); + status = f_utf_is_whitespace(string + i, stop - i, F_false); if (F_status_is_error(status)) break; if (status == F_false) { @@ -1548,7 +1548,7 @@ extern "C" { } } - status = f_utf_is_whitespace(string + i, stop - i); + status = f_utf_is_whitespace(string + i, stop - i, F_false); // Determine if this is an end of string white space that needs to be trimmed. if (status == F_true || !string[i]) { @@ -1585,7 +1585,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + j, stop - j); + status = f_utf_is_whitespace(string + j, stop - j, F_false); if (F_status_is_error(status)) break; if (status == F_false && string[j]) break; @@ -1711,7 +1711,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + i, stop - i); + status = f_utf_is_whitespace(string + i, stop - i, F_false); if (F_status_is_error(status)) break; if (status == F_false) { @@ -1753,7 +1753,7 @@ extern "C" { } } - status = f_utf_is_whitespace(string + i, stop - i); + status = f_utf_is_whitespace(string + i, stop - i, F_false); // Determine if this is an end of string white space that needs to be trimmed. if (status == F_true || !string[i]) { @@ -1789,7 +1789,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + j, stop - j); + status = f_utf_is_whitespace(string + j, stop - j, F_false); if (F_status_is_error(status)) break; if (status == F_false && string[j]) break; @@ -1972,7 +1972,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + i, stop - i); + status = f_utf_is_whitespace(string + i, stop - i, F_false); if (F_status_is_error(status)) break; if (status == F_false) { @@ -2015,7 +2015,7 @@ extern "C" { } } - status = f_utf_is_whitespace(string + i, stop - i); + status = f_utf_is_whitespace(string + i, stop - i, F_false); // Determine if this is an end of string white space that needs to be trimmed. if (status == F_true || !string[i]) { @@ -2052,7 +2052,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + j, stop - j); + status = f_utf_is_whitespace(string + j, stop - j, F_false); if (F_status_is_error(status)) break; if (status == F_false && string[j]) break; @@ -2206,7 +2206,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + i, length - i); + status = f_utf_is_whitespace(string + i, length - i, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -2240,7 +2240,7 @@ extern "C" { while (i < length) { - status = f_utf_is_whitespace(string + i, length - i); + status = f_utf_is_whitespace(string + i, length - i, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -2264,7 +2264,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + j, length - j); + status = f_utf_is_whitespace(string + j, length - j, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -2386,7 +2386,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + i, length - i); + status = f_utf_is_whitespace(string + i, length - i, F_false); // Consider invalid data not-white space. if (F_status_is_error(status)) break; @@ -2408,7 +2408,7 @@ extern "C" { while (i < length) { - status = f_utf_is_whitespace(string + i, length - i); + status = f_utf_is_whitespace(string + i, length - i, F_false); // Determine if this is an end of string white space that needs to be trimmed. if (status == F_true || !string[i]) { @@ -2424,7 +2424,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + j, length - j); + status = f_utf_is_whitespace(string + j, length - j, F_false); if (F_status_is_error(status)) break; if (status == F_false && string[j]) break; @@ -2505,7 +2505,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + i, length - i); + status = f_utf_is_whitespace(string + i, length - i, F_false); // Consider invalid data not-white space. if (F_status_is_error(status)) break; @@ -2527,7 +2527,7 @@ extern "C" { while (i < length) { - status = f_utf_is_whitespace(string + i, length - i); + status = f_utf_is_whitespace(string + i, length - i, F_false); // Determine if this is an end of string white space that needs to be trimmed. if (status == F_true || !string[i]) { @@ -2543,7 +2543,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + j, length - j); + status = f_utf_is_whitespace(string + j, length - j, F_false); if (F_status_is_error(status)) break; if (status == F_false && string[j]) break; @@ -2679,7 +2679,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + i, length - i); + status = f_utf_is_whitespace(string + i, length - i, F_false); if (F_status_is_error(status) || status == F_false) break; if (status == F_false) { @@ -2699,7 +2699,7 @@ extern "C" { while (i < length) { - status = f_utf_is_whitespace(string + i, length - i); + status = f_utf_is_whitespace(string + i, length - i, F_false); // Determine if this is an end of string white space that needs to be trimmed. if (status == F_true || !string[i]) { @@ -2715,7 +2715,7 @@ extern "C" { continue; } - status = f_utf_is_whitespace(string + j, length - j); + status = f_utf_is_whitespace(string + j, length - j, F_false); if (F_status_is_error(status)) break; if (status == F_false && string[j]) break; diff --git a/level_1/fl_string/c/private-string.c b/level_1/fl_string/c/private-string.c index acaaced..fdc2889 100644 --- a/level_1/fl_string/c/private-string.c +++ b/level_1/fl_string/c/private-string.c @@ -139,7 +139,7 @@ extern "C" { width_max = (stop1 - i1) + 1; - status = f_utf_is_whitespace(string1 + i1, width_max); + status = f_utf_is_whitespace(string1 + i1, width_max, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -187,7 +187,7 @@ extern "C" { width_max = (stop2 - i2) + 1; - status = f_utf_is_whitespace(string2 + i2, width_max); + status = f_utf_is_whitespace(string2 + i2, width_max, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -247,7 +247,7 @@ extern "C" { width_max = (stop1 - j) + 1; - status = f_utf_is_whitespace(string1 + j, width_max); + status = f_utf_is_whitespace(string1 + j, width_max, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -304,7 +304,7 @@ extern "C" { width_max = (stop2 - j) + 1; - status = f_utf_is_whitespace(string2 + j, width_max); + status = f_utf_is_whitespace(string2 + j, width_max, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -407,7 +407,7 @@ extern "C" { while (i1 < stop1 && !string1[i1]) ++i1; if (i1 == stop1) break; - status = f_utf_is_whitespace(string1 + i1, (stop1 - i1) + 1); + status = f_utf_is_whitespace(string1 + i1, (stop1 - i1) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -442,7 +442,7 @@ extern "C" { while (i2 < stop2 && !string2[i2]) ++i2; if (i2 == stop2) break; - status = f_utf_is_whitespace(string2 + i2, (stop2 - i2) + 1); + status = f_utf_is_whitespace(string2 + i2, (stop2 - i2) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -487,7 +487,7 @@ extern "C" { while (j < stop1 && !string1[j]) ++j; if (j == stop1) break; - status = f_utf_is_whitespace(string1 + j, (stop1 - j) + 1); + status = f_utf_is_whitespace(string1 + j, (stop1 - j) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -534,7 +534,7 @@ extern "C" { while (j < stop2 && !string2[j]) ++j; if (j == stop2) break; - status = f_utf_is_whitespace(string2 + j, (stop2 - j) + 1); + status = f_utf_is_whitespace(string2 + j, (stop2 - j) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -619,7 +619,7 @@ extern "C" { while (*start < *stop && !string[*start]) ++(*start); if (*start > *stop) break; - status = f_utf_is_whitespace(string + *start, (*stop - *start) + 1); + status = f_utf_is_whitespace(string + *start, (*stop - *start) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -671,7 +671,7 @@ extern "C" { if (*stop == *start) break; - status = f_utf_is_whitespace(string + *stop, (stop_original - *stop) + 1); + status = f_utf_is_whitespace(string + *stop, (stop_original - *stop) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; @@ -699,7 +699,7 @@ extern "C" { } // for if (*stop == *start) { - status = f_utf_is_whitespace(string + *stop, (stop_original - *stop) + 1); + status = f_utf_is_whitespace(string + *stop, (stop_original - *stop) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_parameter) return status; diff --git a/level_1/fl_string/c/string.c b/level_1/fl_string/c/string.c index df2ed08..76d16a7 100644 --- a/level_1/fl_string/c/string.c +++ b/level_1/fl_string/c/string.c @@ -449,7 +449,7 @@ extern "C" { f_array_length_t width_max = (range->stop - range->start) + 1; - while (buffer.string[range->start] == placeholder || (status = f_utf_is_whitespace(buffer.string + range->start, width_max)) == F_false) { + while (buffer.string[range->start] == placeholder || (status = f_utf_is_whitespace(buffer.string + range->start, width_max, F_false)) == F_false) { if (F_status_is_error(status)) return status; @@ -598,7 +598,7 @@ extern "C" { for (; range->start <= range->stop; ) { - status = f_utf_is_whitespace(buffer.string + range->start, (range->stop - range->start) + 1); + status = f_utf_is_whitespace(buffer.string + range->start, (range->stop - range->start) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -757,7 +757,7 @@ extern "C" { // The end of line, whitespace, or range stop point are the only valid stop points. if (range->start <= range->stop) { - status = f_utf_is_whitespace(buffer.string + range->start, (range->stop - range->start) + 1); + status = f_utf_is_whitespace(buffer.string + range->start, (range->stop - range->start) + 1, F_false); if (F_status_is_error(status)) { if (id) { @@ -992,7 +992,7 @@ extern "C" { f_array_length_t width_max = (range->stop - range->start) + 1; - while (string[range->start] == placeholder || (status = f_utf_is_whitespace(string + range->start, width_max)) == F_false) { + while (string[range->start] == placeholder || (status = f_utf_is_whitespace(string + range->start, width_max, F_false)) == F_false) { if (F_status_is_error(status)) return status; diff --git a/level_1/fl_utf/c/private-utf.c b/level_1/fl_utf/c/private-utf.c index 81a6bdd..fa15a5e 100644 --- a/level_1/fl_utf/c/private-utf.c +++ b/level_1/fl_utf/c/private-utf.c @@ -52,7 +52,7 @@ extern "C" { while (i1 < stop1 && !string1[i1]) ++i1; if (i1 == stop1) break; - status = f_utf_character_is_whitespace(string1[i1]); + status = f_utf_character_is_whitespace(string1[i1], F_false); if (F_status_is_error(status)) { @@ -72,7 +72,7 @@ extern "C" { while (i2 < stop2 && !string2[i2]) ++i2; if (i2 == stop2) break; - status = f_utf_character_is_whitespace(string2[i2]); + status = f_utf_character_is_whitespace(string2[i2], F_false); if (F_status_is_error(status)) { @@ -101,7 +101,7 @@ extern "C" { while (j < stop1 && !string1[j]) ++j; if (j == stop1) break; - status = f_utf_character_is_whitespace(string1[j]); + status = f_utf_character_is_whitespace(string1[j], F_false); if (F_status_is_error(status)) { // ignore possibly invalid UTF-8 codes. @@ -123,7 +123,7 @@ extern "C" { while (j < stop2 && !string2[j]) ++j; if (j == stop2) break; - status = f_utf_character_is_whitespace(string2[j]); + status = f_utf_character_is_whitespace(string2[j], F_false); if (F_status_is_error(status)) { @@ -182,7 +182,7 @@ extern "C" { while (*start < *stop && !source[*start]) ++(*start); if (*start > *stop) break; - status = f_utf_character_is_whitespace(source[*start]); + status = f_utf_character_is_whitespace(source[*start], F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -203,7 +203,7 @@ extern "C" { if (!source[*stop]) continue; if (*stop == *start) break; - status = f_utf_character_is_whitespace(source[*stop]); + status = f_utf_character_is_whitespace(source[*stop], F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { @@ -217,7 +217,7 @@ extern "C" { } // for if (*stop == *start) { - status = f_utf_character_is_whitespace(source[*stop]); + status = f_utf_character_is_whitespace(source[*stop], F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { diff --git a/level_1/fl_utf/c/utf.c b/level_1/fl_utf/c/utf.c index cc1fd4b..364c74d 100644 --- a/level_1/fl_utf/c/utf.c +++ b/level_1/fl_utf/c/utf.c @@ -183,7 +183,7 @@ extern "C" { return F_status_set_error(F_utf_fragment); } - while (buffer.string[range->start] == placeholder || (status = f_utf_character_is_whitespace(buffer.string[range->start])) == F_false) { + while (buffer.string[range->start] == placeholder || (status = f_utf_character_is_whitespace(buffer.string[range->start], F_false)) == F_false) { if (F_status_is_error(status)) return status; if (buffer.string[range->start] == f_utf_char_t_eol_s) return F_none_eol; diff --git a/level_2/fll_fss/c/fss.c b/level_2/fll_fss/c/fss.c index b5fa053..d2f2b1f 100644 --- a/level_2/fll_fss/c/fss.c +++ b/level_2/fll_fss/c/fss.c @@ -76,7 +76,7 @@ extern "C" { return F_found_not; } - f_status_t status = f_utf_is_whitespace(buffer.string + range->start, (range->stop - range->start) + 1); + f_status_t status = f_utf_is_whitespace(buffer.string + range->start, (range->stop - range->start) + 1, F_false); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_maybe) { diff --git a/level_3/byte_dump/c/private-byte_dump.c b/level_3/byte_dump/c/private-byte_dump.c index 0c6ea3b..2d94f96 100644 --- a/level_3/byte_dump/c/private-byte_dump.c +++ b/level_3/byte_dump/c/private-byte_dump.c @@ -795,7 +795,7 @@ extern "C" { } } } - else if (f_utf_character_is_whitespace(sequence.string[i]) == F_true) { + else if (f_utf_character_is_whitespace(sequence.string[i], F_true) == F_true) { if (data->main->parameters.array[byte_dump_parameter_classic_e].result == f_console_result_found_e) { f_print_dynamic_raw(f_string_ascii_period_s, data->main->output.to.stream); } diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c index ad09c7f..48adae3 100644 --- a/level_3/utf8/c/private-utf8_codepoint.c +++ b/level_3/utf8/c/private-utf8_codepoint.c @@ -203,7 +203,7 @@ extern "C" { status = F_space; } else if (macro_f_utf_byte_width_is(*unicode.string)) { - status = f_utf_is_whitespace(unicode.string, 4); + status = f_utf_is_whitespace(unicode.string, 4, F_true); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_complete_not_utf || F_status_set_fine(status) == F_utf_fragment) { @@ -223,7 +223,7 @@ extern "C" { } else { if (unicode.string[0] < 0x30 || unicode.string[0] > (0x39 && unicode.string[0] < 0x41) || (unicode.string[0] > 0x46 && unicode.string[0] < 0x61) || unicode.string[0] > 0x66) { - status = f_utf_is_whitespace(unicode.string, 4); + status = f_utf_is_whitespace(unicode.string, 4, F_true); if (F_status_is_error(status)) { if (F_status_set_fine(status) == F_complete_not_utf || F_status_set_fine(status) == F_utf_fragment) { -- 1.8.3.1