From 8f28e079e106e157ade2b566ecca24df1ca0b5d9 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Thu, 30 Apr 2020 00:21:07 -0500 Subject: [PATCH] Feature: implement UTF-8 special character check types (partial-stubs) Types are: - alpha - alpha-numeric - numeric - word (alpha-numeric and underscore '_') - word-dash (alpha-numeric, underscore '_', and dash '-') The ASCII portion of this implemented and should work. The UTF-8/Unicode portion is completely unimplemented (aka: a stub). --- level_0/f_utf/c/private-utf.c | 45 ++++++ level_0/f_utf/c/private-utf.h | 115 +++++++++++++++ level_0/f_utf/c/utf.c | 270 ++++++++++++++++++++++++++++++++++ level_0/f_utf/c/utf.h | 250 +++++++++++++++++++++++++++++++ 4 files changed, 680 insertions(+) diff --git a/level_0/f_utf/c/private-utf.c b/level_0/f_utf/c/private-utf.c index e9e026c83..752a77bcc 100644 --- a/level_0/f_utf/c/private-utf.c +++ b/level_0/f_utf/c/private-utf.c @@ -5,6 +5,24 @@ extern "C" { #endif +#if !defined(_di_f_utf_character_is_alpha_) || !defined(_di_f_utf_is_alpha_) + f_return_status private_f_utf_character_is_alpha(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "alpha". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_alpha_) || !defined(_di_f_utf_is_alpha_) + +#if !defined(_di_f_utf_character_is_alpha_numeric_) || !defined(_di_f_utf_is_alpha_numeric_) + f_return_status private_f_utf_character_is_alpha_numeric(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "alpha_numeric". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_alpha_numeric_) || !defined(_di_f_utf_is_alpha_numeric_) + #if !defined(_di_f_utf_character_is_control_) || !defined(_di_f_utf_is_control_) f_return_status private_f_utf_character_is_control(const f_utf_character character, const uint8_t width) { if (width == 2) { @@ -53,6 +71,15 @@ extern "C" { } #endif // !defined(_di_f_utf_character_is_control_picture_) || !defined(_di_f_utf_is_control_picture_) +#if !defined(_di_f_utf_character_is_numeric_) || !defined(_di_f_utf_is_numeric_) + f_return_status private_f_utf_character_is_numeric(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "numeric". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_numeric_) || !defined(_di_f_utf_is_numeric_) + #if !defined(_di_f_utf_character_is_valid_) || !defined(_di_f_utf_is_valid_) f_return_status private_f_utf_character_is_valid(const f_utf_character character, const uint8_t width) { // reduce the number of checks by grouping checks by first byte. @@ -2425,6 +2452,24 @@ extern "C" { } #endif // !defined(_di_f_utf_character_is_whitespace_) || !defined(_di_f_utf_is_whitespace_) +#if !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) + f_return_status private_f_utf_character_is_word(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "word". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) + +#if !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_is_word_dash_) + f_return_status private_f_utf_character_is_word_dash(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "word_dash". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_is_word_dash_) + #if !defined(_di_f_utf_character_is_zero_width_) || !defined(_di_f_utf_is_zero_width_) f_return_status private_f_utf_character_is_zero_width(const f_utf_character character) { // reduce the number of checks by grouping checks by first byte. diff --git a/level_0/f_utf/c/private-utf.h b/level_0/f_utf/c/private-utf.h index 4236a1085..e89c277b4 100644 --- a/level_0/f_utf/c/private-utf.h +++ b/level_0/f_utf/c/private-utf.h @@ -17,6 +17,52 @@ extern "C" { #endif +/** + * Private implementation of f_utf_character_is_alpha(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_alpha() + * @see f_utf_is_alpha() + */ +#if !defined(_di_f_utf_character_is_alpha_) || !defined(_di_f_utf_is_alpha_) + extern f_return_status private_f_utf_character_is_alpha(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_alpha_) || !defined(_di_f_utf_is_alpha_) + +/** + * Private implementation of f_utf_character_is_alpha_numeric(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_control() + * @see f_utf_is_control() + */ +#if !defined(_di_f_utf_character_is_alpha_numeric_) || !defined(_di_f_utf_is_alpha_numeric_) + extern f_return_status private_f_utf_character_is_alpha_numeric(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_alpha_numeric_) || !defined(_di_f_utf_is_alpha_numeric_) + /** * Private implementation of f_utf_character_is_control(). * @@ -60,6 +106,29 @@ extern "C" { extern f_return_status private_f_utf_character_is_control_picture(const f_utf_character character) f_gcc_attribute_visibility_internal; #endif // !defined(_di_f_utf_character_is_control_picture_) || !defined(_di_f_utf_is_control_picture_) +/** + * Private implementation of f_utf_character_is_numeric(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_numeric() + * @see f_utf_is_numeric() + */ +#if !defined(_di_f_utf_character_is_numeric_) || !defined(_di_f_utf_is_numeric_) + extern f_return_status private_f_utf_character_is_numeric(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_numeric_) || !defined(_di_f_utf_is_numeric_) + /** * Private implementation of f_utf_character_is_valid(). * @@ -102,6 +171,52 @@ extern "C" { extern f_return_status private_f_utf_character_is_whitespace(const f_utf_character character) f_gcc_attribute_visibility_internal; #endif // !defined(_di_f_utf_character_is_whitespace_) || !defined(_di_f_utf_is_whitespace_) +/** + * Private implementation of f_utf_character_is_word(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_word() + * @see f_utf_is_word() + */ +#if !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) + extern f_return_status private_f_utf_character_is_word(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) + +/** + * Private implementation of f_utf_character_is_word_dash(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_word_dash() + * @see f_utf_is_word_dash() + */ +#if !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_is_word_dash_) + extern f_return_status private_f_utf_character_is_word_dash(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_is_word_dash_) + /** * Private implementation of f_utf_character_is_zero_width(). * diff --git a/level_0/f_utf/c/utf.c b/level_0/f_utf/c/utf.c index 8ff3ce3c3..b7ea77fa3 100644 --- a/level_0/f_utf/c/utf.c +++ b/level_0/f_utf/c/utf.c @@ -21,6 +21,46 @@ extern "C" { } #endif // _di_f_utf_character_is_ +#ifndef _di_f_utf_character_is_alpha_ + f_return_status f_utf_character_is_alpha(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isalpha(f_macro_utf_character_to_char_1(character))) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_alpha(character, width); + } +#endif // _di_f_utf_character_is_alpha_ + +#ifndef _di_f_utf_character_is_alpha_numeric_ + f_return_status f_utf_character_is_alpha_numeric(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isalnum(f_macro_utf_character_to_char_1(character))) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_alpha_numeric(character, width); + } +#endif // _di_f_utf_character_is_alpha_numeric_ + #ifndef _di_f_utf_character_is_control_ f_return_status f_utf_character_is_control(const f_utf_character character) { unsigned short width = f_macro_utf_character_width_is(character); @@ -105,6 +145,26 @@ extern "C" { } #endif // _di_f_utf_character_is_graph_ +#ifndef _di_f_utf_character_is_numeric_ + f_return_status f_utf_character_is_numeric(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isdigit(f_macro_utf_character_to_char_1(character))) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_numeric(character, width); + } +#endif // _di_f_utf_character_is_numeric_ + #ifndef _di_f_utf_character_is_valid_ f_return_status f_utf_character_is_valid(const f_utf_character character) { unsigned short width = f_macro_utf_character_width_is(character); @@ -137,6 +197,46 @@ extern "C" { } #endif // _di_f_utf_character_is_whitespace_ +#ifndef _di_f_utf_character_is_word_ + f_return_status f_utf_character_is_word(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isalnum(f_macro_utf_character_to_char_1(character)) || character == '_') { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_word(character, width); + } +#endif // _di_f_utf_character_is_word_ + +#ifndef _di_f_utf_character_is_word_dash_ + f_return_status f_utf_character_is_word_dash(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isalnum(f_macro_utf_character_to_char_1(character)) || character == '_' || character == '-') { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_word_dash(character, width); + } +#endif // _di_f_utf_character_is_word_dash_ + #ifndef _di_f_utf_character_is_zero_width_ f_return_status f_utf_character_is_zero_width(const f_utf_character character) { if (f_macro_utf_character_width_is(character) == 1) { @@ -236,6 +336,74 @@ extern "C" { } #endif // _di_f_utf_is_ +#ifndef _di_f_utf_is_alpha_ + f_return_status f_utf_is_alpha(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (isalpha(*character)) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_alpha(character_utf, width); + } +#endif // _di_f_utf_is_alpha_ + +#ifndef _di_f_utf_is_alpha_numeric_ + f_return_status f_utf_is_alpha_numeric(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (isalnum(*character)) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_alpha_numeric(character_utf, width); + } +#endif // _di_f_utf_is_alpha_numeric_ + #ifndef _di_f_utf_is_control_ f_return_status f_utf_is_control(const f_string character, const uint8_t width_max) { #ifndef _di_level_0_parameter_checking_ @@ -348,6 +516,40 @@ extern "C" { } #endif // _di_f_utf_is_graph_ +#ifndef _di_f_utf_is_numeric_ + f_return_status f_utf_is_numeric(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (isdigit(*character)) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_numeric(character_utf, width); + } +#endif // _di_f_utf_is_numeric_ + #ifndef _di_f_utf_is_valid_ f_return_status f_utf_is_valid(const f_string character, const uint8_t width_max) { #ifndef _di_level_0_parameter_checking_ @@ -408,6 +610,74 @@ extern "C" { } #endif // _di_f_utf_is_whitespace_ +#ifndef _di_f_utf_is_word_ + f_return_status f_utf_is_word(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (iscntrl(*character) || *character == '_') { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_word(character_utf, width); + } +#endif // _di_f_utf_is_word_ + +#ifndef _di_f_utf_is_word_dash_ + f_return_status f_utf_is_word_dash(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (iscntrl(*character) || *character == '_' || *character == '-') { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_word_dash(character_utf, width); + } +#endif // _di_f_utf_is_word_dash_ + #ifndef _di_f_utf_is_zero_width_ f_return_status f_utf_is_zero_width(const f_string character, const uint8_t width_max) { #ifndef _di_level_0_parameter_checking_ diff --git a/level_0/f_utf/c/utf.h b/level_0/f_utf/c/utf.h index 16829e981..abf79b960 100644 --- a/level_0/f_utf/c/utf.h +++ b/level_0/f_utf/c/utf.h @@ -519,6 +519,46 @@ extern "C" { extern f_return_status f_utf_character_is(const f_utf_character character); #endif // _di_f_utf_character_is_ +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 alphabet character. + * f_false if not a UTF-8 alphabet character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_alpha() + */ +#ifndef _di_f_utf_character_is_alpha_ + extern f_return_status f_utf_character_is_alpha(const f_utf_character character); +#endif // _di_f_utf_character_is_alpha_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 alpha-numeric character. + * f_false if not a UTF-8 alpha-numeric character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_alphanumeric() + */ +#ifndef _di_f_utf_character_is_alpha_numeric_ + extern f_return_status f_utf_character_is_alpha_numeric(const f_utf_character character); +#endif // _di_f_utf_character_is_alpha_numeric_ + /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character. * @@ -598,6 +638,26 @@ extern "C" { extern f_return_status f_utf_character_is_graph(const f_utf_character character); #endif // _di_f_utf_character_is_graph_ +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 numeric character. + * f_false if not a UTF-8 numeric character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_numeric() + */ +#ifndef _di_f_utf_character_is_numeric_ + extern f_return_status f_utf_character_is_numeric(const f_utf_character character); +#endif // _di_f_utf_character_is_numeric_ + /** * Check to see if the entire byte block of the character is a valid UTF-8 character. * @@ -644,6 +704,50 @@ extern "C" { extern f_return_status f_utf_character_is_whitespace(const f_utf_character character); #endif // _di_f_utf_character_is_whitespace_ +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character. + * + * A word character is alpha-numeric or an underscore '_'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 word character. + * f_false if not a UTF-8 word character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_word() + */ +#ifndef _di_f_utf_character_is_word_ + extern f_return_status f_utf_character_is_word(const f_utf_character character); +#endif // _di_f_utf_character_is_word_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character. + * + * A word dash character is alpha-numeric, an underscore '_' or a dash '-'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 word or dash character. + * f_false if not a UTF-8 word or dash character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_word_dash() + */ +#ifndef _di_f_utf_character_is_word_dash_ + extern f_return_status f_utf_character_is_word_dash(const f_utf_character character); +#endif // _di_f_utf_character_is_word_dash_ + /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character. * @@ -663,6 +767,28 @@ extern "C" { extern f_return_status f_utf_character_is_zero_width(const f_utf_character character); #endif // _di_f_utf_character_is_zero_width_ +/** + * Check to see if the entire byte block of the character is an word character. + * + * A word character is alphanumeric or underscore '_'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 alpha character. + * f_false if not a UTF-8 alpha character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_word() + */ +#ifndef _di_f_utf_character_is_word_ + extern f_return_status f_utf_character_is_word(const f_utf_character character); +#endif // _di_f_utf_character_is_word_ + /** * Convert a specialized f_utf_character type to a int8_t, stored as a string (character buffer). * @@ -729,6 +855,54 @@ extern "C" { extern f_return_status f_utf_is(const f_string character, const uint8_t width_max); #endif // _di_f_utf_is_ +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 alphabet character. + * f_false if not a UTF-8 alphabet character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_alpha() + */ +#ifndef _di_f_utf_is_alpha_ + extern f_return_status f_utf_is_alpha(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_alpha_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 alpha-numeric character. + * f_false if not a UTF-8 alpha-numeric character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_alpha_numeric() + */ +#ifndef _di_f_utf_is_alpha_numeric_ + extern f_return_status f_utf_is_alpha_numeric(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_alpha_numeric_ + /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character. * @@ -837,6 +1011,30 @@ extern "C" { extern f_return_status f_utf_is_graph(const f_string character, const uint8_t width_max); #endif // _di_f_utf_is_graph_ +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 numeric character. + * f_false if not a UTF-8 numeric character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_numeric() + */ +#ifndef _di_f_utf_is_numeric_ + extern f_return_status f_utf_is_numeric(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_numeric_ + /** * Check to see if the entire byte block of the character is a UTF-8 character and if that character is a valid UTF-8. * @@ -892,6 +1090,58 @@ extern "C" { extern f_return_status f_utf_is_whitespace(const f_string character, const uint8_t width_max); #endif // _di_f_utf_is_whitespace_ +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character. + * + * A word character is alpha-numeric or an underscore '_'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 word character. + * f_false if not a UTF-8 word character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_word() + */ +#ifndef _di_f_utf_is_word_ + extern f_return_status f_utf_is_word(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_word_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character. + * + * A word dash character is alpha-numeric, an underscore '_' or a dash '-'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 word or dash character. + * f_false if not a UTF-8 word or dash character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_word_dash() + */ +#ifndef _di_f_utf_is_word_dash_ + extern f_return_status f_utf_is_word_dash(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_word_dash_ + /** * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character. * -- 2.47.3