From 0f8fba8d4852fce1901a1ac29378f0e34e5bd5a2 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Sat, 31 Aug 2019 16:18:42 -0500 Subject: [PATCH] Update: provide f_utf_is() and f_utf_is_character() functions --- level_0/f_utf/c/utf.c | 34 ++++++++++++++++++++++++++++++++++ level_0/f_utf/c/utf.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/level_0/f_utf/c/utf.c b/level_0/f_utf/c/utf.c index e4e268e..8557a39 100644 --- a/level_0/f_utf/c/utf.c +++ b/level_0/f_utf/c/utf.c @@ -4,6 +4,25 @@ extern "C" { #endif +#ifndef _di_f_utf_is_ + f_return_status f_utf_is(const f_string character, const f_u_short max_width) { + #ifndef _di_level_0_parameter_checking_ + if (max_width < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + f_u_short width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + return f_false; + } + else if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + return f_true; + } +#endif // _di_f_utf_is_ + #ifndef _di_f_utf_is_bom_ f_return_status f_utf_is_bom(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ @@ -33,6 +52,21 @@ extern "C" { } #endif // _di_f_utf_is_bom_ +#ifndef _di_f_utf_is_character_ + f_return_status f_utf_is_character(const f_utf_character character) { + f_u_short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + return f_false; + } + else if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + return f_true; + } +#endif // _di_f_utf_is_ + #ifndef _di_f_utf_is_graph_ f_return_status f_utf_is_graph(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ diff --git a/level_0/f_utf/c/utf.h b/level_0/f_utf/c/utf.h index d5eb5f2..5fc452e 100644 --- a/level_0/f_utf/c/utf.h +++ b/level_0/f_utf/c/utf.h @@ -235,6 +235,26 @@ extern "C" { #endif // _di_f_utf_substitute_ /** + * Check to see if the entire byte block of the character is a UTF-8 character. + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by max_width. + * @param max_width + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 character. + * f_false if not a UTF-8 character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * f_invalid_parameter (with error bit) if a parameter is invalid. + */ +#ifndef _di_f_utf_is_ + extern f_return_status f_utf_is(const f_string character, const f_u_short max_width); +#endif // _di_f_utf_is_ + +/** * Check to see if the entire byte block of the character is a UTF-8 BOM. * * @param character @@ -256,6 +276,22 @@ extern "C" { #endif // _di_f_utf_is_bom_ /** + * Check to see if the entire byte block of the character is a UTF-8 character. + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 character. + * f_false if not a UTF-8 character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * f_invalid_parameter (with error bit) if a parameter is invalid. + */ +#ifndef _di_f_utf_is_ + extern f_return_status f_utf_is_character(const f_utf_character character); +#endif // _di_f_utf_is_ + +/** * Check to see if the entire byte block of the character is a UTF-8 printable character. * * This does not check non-UTF-8 graph. -- 1.8.3.1