From 2841ab5748320c021403425355ff293590c56d37 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Wed, 21 Aug 2019 19:04:32 -0500 Subject: [PATCH] Update: use max_width instead of maxWidth, remove f_utf_bom check from f_is_space, and provide f_macro_utf_byte_width_is for UTF-8 Cleanup accidental use of maxWidth, when the proper syntax style is instead max_width. After further review, I have decided to move the f_utf_bom check back outside of the f_is_space() function. Provide f_macro_utf_byte_width_is() function for getting UTF-8 character widths but returning 0 if the character is ASCII. --- level_0/f_utf/c/utf.c | 28 ++++++++++++---------------- level_0/f_utf/c/utf.h | 33 +++++++++++++++++---------------- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/level_0/f_utf/c/utf.c b/level_0/f_utf/c/utf.c index 85a803f..ab589b3 100644 --- a/level_0/f_utf/c/utf.c +++ b/level_0/f_utf/c/utf.c @@ -5,9 +5,9 @@ extern "C" { #endif #ifndef _di_f_utf_is_bom_ - f_return_status f_utf_is_bom(const f_string character, const f_u_short maxWidth) { + f_return_status f_utf_is_bom(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ - if (maxWidth < 1) return f_error_set_error(f_invalid_parameter); + if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ f_u_short width = f_macro_utf_byte_width(*character); @@ -16,7 +16,7 @@ extern "C" { return f_false; } - if (width > maxWidth) { + if (width > max_width) { return f_error_set_error(f_maybe); } @@ -31,9 +31,9 @@ extern "C" { #endif // _di_f_utf_is_bom_ #ifndef _di_f_utf_is_space_ - f_return_status f_utf_is_space(const f_string character, const f_u_short maxWidth) { + f_return_status f_utf_is_space(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ - if (maxWidth < 1) return f_error_set_error(f_invalid_parameter); + if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ f_u_short width = f_macro_utf_byte_width(*character); @@ -42,7 +42,7 @@ extern "C" { return f_false; } - if (width > maxWidth) { + if (width > max_width) { return f_error_set_error(f_maybe); } @@ -155,10 +155,6 @@ extern "C" { return f_true; } - if (!memcmp(character, f_utf_bom, width)) { - return f_true; - } - return f_false; } @@ -167,9 +163,9 @@ extern "C" { #endif // _di_f_utf_is_space_ #ifndef _di_f_utf_is_substitute_ - f_return_status f_utf_is_substitute(const f_string character, const f_u_short maxWidth) { + f_return_status f_utf_is_substitute(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ - if (maxWidth < 1) return f_error_set_error(f_invalid_parameter); + if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ f_u_short width = f_macro_utf_byte_width(*character); @@ -178,7 +174,7 @@ extern "C" { return f_false; } - if (width > maxWidth) { + if (width > max_width) { return f_error_set_error(f_maybe); } @@ -215,9 +211,9 @@ extern "C" { #endif // _di_f_utf_is_substitute_ #ifndef _di_f_utf_is_whitespace_ - f_return_status f_utf_is_whitespace(const f_string character, const f_u_short maxWidth) { + f_return_status f_utf_is_whitespace(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ - if (maxWidth < 1) return f_error_set_error(f_invalid_parameter); + if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ f_u_short width = f_macro_utf_byte_width(*character); @@ -226,7 +222,7 @@ extern "C" { return f_false; } - if (width > maxWidth) { + if (width > max_width) { return f_error_set_error(f_maybe); } diff --git a/level_0/f_utf/c/utf.h b/level_0/f_utf/c/utf.h index aee00cc..53498de 100644 --- a/level_0/f_utf/c/utf.h +++ b/level_0/f_utf/c/utf.h @@ -72,6 +72,8 @@ extern "C" { * The f_macro_utf_byte_is_* macros are used to determine a width of the character (either 1, 2, 3, or 4, respectively). * * The f_macro_utf_byte_width macro determines a width of the character. + * + * The f_macro_utf_byte_width_is is identical to f_macro_utf_byte_width, except it returns 0 when character is not UTF-8. */ #ifndef _di_f_utf_byte_ #define f_utf_byte_1 0x80 // 1000 0000 @@ -91,7 +93,8 @@ extern "C" { #define f_macro_utf_byte_is_3(character) ((character & f_utf_byte_off_3) == f_utf_byte_3) // (1110 xxxx & 1111 0000) == 1110 0000 #define f_macro_utf_byte_is_4(character) ((character & f_utf_byte_off_4) == f_utf_byte_4) // (1111 0xxx & 1111 1000) == 1111 0000 - #define f_macro_utf_byte_width(character) (!f_macro_utf_byte_is(character) || f_macro_utf_byte_is_1(character)) ? 1 : (f_macro_utf_byte_is_2(character) ? 2 : (f_macro_utf_byte_is_3(character) ? 3 : 4)) + #define f_macro_utf_byte_width(character) (!f_macro_utf_byte_is(character) || f_macro_utf_byte_is_1(character)) ? 1 : (f_macro_utf_byte_is_2(character) ? 2 : (f_macro_utf_byte_is_3(character) ? 3 : 4)) + #define f_macro_utf_byte_width_is(character) (f_macro_utf_byte_is(character) ? (f_macro_utf_byte_is_1(character) ? 1 : (f_macro_utf_byte_is_2(character) ? 2 : (f_macro_utf_byte_is_3(character) ? 3 : 4))) : 0) #endif // _di_f_utf_byte_ /** @@ -187,8 +190,8 @@ extern "C" { * * @param character * The character to validate. - * There must be enough space allocated to compare against, as limited by maxWidth. - * @param maxWidth + * There must be enough space allocated to compare against, as limited by max_width. + * @param max_width * The maximum width available for checking. * Can be anything greater than 0. * @@ -199,31 +202,29 @@ extern "C" { * f_invalid_parameter (with error bit) if a parameter is invalid. */ #ifndef _di_f_utf_is_bom_ - extern f_return_status f_utf_is_bom(const f_string character, const f_u_short maxWidth); + extern f_return_status f_utf_is_bom(const f_string character, const f_u_short max_width); #endif // _di_f_utf_is_bom_ /** * Check to see if the entire byte block of the character is a UTF-8 whitespace or substitute character. * - * This will also return TRUE for the UTF-8 BOM. - * * This does not check non-UTF-8 whitespace. * * @param character * The character to validate. - * There must be enough space allocated to compare against, as limited by maxWidth. - * @param maxWidth + * There must be enough space allocated to compare against, as limited by max_width. + * @param max_width * The maximum width available for checking. * Can be anything greater than 0. * * @return - * f_true if a UTF-8 whitespace, substitute, or UTF-8 BOM. + * f_true if a UTF-8 whitespace or substitute. * f_false if not a UTF-8 whitespace or substitute. * f_maybe (with error bit) if this could be a whitespace or substitute but width is not long enough. * f_invalid_parameter (with error bit) if a parameter is invalid. */ #ifndef _di_f_utf_is_space_ - extern f_return_status f_utf_is_space(const f_string character, const f_u_short maxWidth); + extern f_return_status f_utf_is_space(const f_string character, const f_u_short max_width); #endif // _di_f_utf_is_space_ /** @@ -233,8 +234,8 @@ extern "C" { * * @param character * The character to validate. - * There must be enough space allocated to compare against, as limited by maxWidth. - * @param maxWidth + * There must be enough space allocated to compare against, as limited by max_width. + * @param max_width * The maximum width available for checking. * Can be anything greater than 0. * @@ -245,7 +246,7 @@ extern "C" { * f_invalid_parameter (with error bit) if a parameter is invalid. */ #ifndef _di_f_utf_is_substitute_ - extern f_return_status f_utf_is_substitute(const f_string character, const f_u_short maxWidth); + extern f_return_status f_utf_is_substitute(const f_string character, const f_u_short max_width); #endif // _di_f_utf_is_substitute_ /** @@ -255,8 +256,8 @@ extern "C" { * * @param character * The character to validate. - * There must be enough space allocated to compare against, as limited by maxWidth. - * @param maxWidth + * There must be enough space allocated to compare against, as limited by max_width. + * @param max_width * The maximum width available for checking. * Can be anything greater than 0. * @@ -267,7 +268,7 @@ extern "C" { * f_invalid_parameter (with error bit) if a parameter is invalid. */ #ifndef _di_f_utf_is_whitespace_ - extern f_return_status f_utf_is_whitespace(const f_string character, const f_u_short maxWidth); + extern f_return_status f_utf_is_whitespace(const f_string character, const f_u_short max_width); #endif // _di_f_utf_is_whitespace_ #ifdef __cplusplus -- 1.8.3.1