From 8f28e079e106e157ade2b566ecca24df1ca0b5d9 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Thu, 30 Apr 2020 00:21:07 -0500 Subject: [PATCH] Feature: implement UTF-8 special character check types (partial-stubs) Types are: - alpha - alpha-numeric - numeric - word (alpha-numeric and underscore '_') - word-dash (alpha-numeric, underscore '_', and dash '-') The ASCII portion of this implemented and should work. The UTF-8/Unicode portion is completely unimplemented (aka: a stub). --- level_0/f_utf/c/private-utf.c | 45 +++++++ level_0/f_utf/c/private-utf.h | 115 ++++++++++++++++++ level_0/f_utf/c/utf.c | 270 ++++++++++++++++++++++++++++++++++++++++++ level_0/f_utf/c/utf.h | 250 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 680 insertions(+) diff --git a/level_0/f_utf/c/private-utf.c b/level_0/f_utf/c/private-utf.c index e9e026c..752a77b 100644 --- a/level_0/f_utf/c/private-utf.c +++ b/level_0/f_utf/c/private-utf.c @@ -5,6 +5,24 @@ extern "C" { #endif +#if !defined(_di_f_utf_character_is_alpha_) || !defined(_di_f_utf_is_alpha_) + f_return_status private_f_utf_character_is_alpha(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "alpha". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_alpha_) || !defined(_di_f_utf_is_alpha_) + +#if !defined(_di_f_utf_character_is_alpha_numeric_) || !defined(_di_f_utf_is_alpha_numeric_) + f_return_status private_f_utf_character_is_alpha_numeric(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "alpha_numeric". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_alpha_numeric_) || !defined(_di_f_utf_is_alpha_numeric_) + #if !defined(_di_f_utf_character_is_control_) || !defined(_di_f_utf_is_control_) f_return_status private_f_utf_character_is_control(const f_utf_character character, const uint8_t width) { if (width == 2) { @@ -53,6 +71,15 @@ extern "C" { } #endif // !defined(_di_f_utf_character_is_control_picture_) || !defined(_di_f_utf_is_control_picture_) +#if !defined(_di_f_utf_character_is_numeric_) || !defined(_di_f_utf_is_numeric_) + f_return_status private_f_utf_character_is_numeric(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "numeric". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_numeric_) || !defined(_di_f_utf_is_numeric_) + #if !defined(_di_f_utf_character_is_valid_) || !defined(_di_f_utf_is_valid_) f_return_status private_f_utf_character_is_valid(const f_utf_character character, const uint8_t width) { // reduce the number of checks by grouping checks by first byte. @@ -2425,6 +2452,24 @@ extern "C" { } #endif // !defined(_di_f_utf_character_is_whitespace_) || !defined(_di_f_utf_is_whitespace_) +#if !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) + f_return_status private_f_utf_character_is_word(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "word". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) + +#if !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_is_word_dash_) + f_return_status private_f_utf_character_is_word_dash(const f_utf_character character, const uint8_t width) { + + // @todo: handle all Unicode "word_dash". + + return f_false; + } +#endif // !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_is_word_dash_) + #if !defined(_di_f_utf_character_is_zero_width_) || !defined(_di_f_utf_is_zero_width_) f_return_status private_f_utf_character_is_zero_width(const f_utf_character character) { // reduce the number of checks by grouping checks by first byte. diff --git a/level_0/f_utf/c/private-utf.h b/level_0/f_utf/c/private-utf.h index 4236a10..e89c277 100644 --- a/level_0/f_utf/c/private-utf.h +++ b/level_0/f_utf/c/private-utf.h @@ -18,6 +18,52 @@ extern "C" { #endif /** + * Private implementation of f_utf_character_is_alpha(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_alpha() + * @see f_utf_is_alpha() + */ +#if !defined(_di_f_utf_character_is_alpha_) || !defined(_di_f_utf_is_alpha_) + extern f_return_status private_f_utf_character_is_alpha(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_alpha_) || !defined(_di_f_utf_is_alpha_) + +/** + * Private implementation of f_utf_character_is_alpha_numeric(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_control() + * @see f_utf_is_control() + */ +#if !defined(_di_f_utf_character_is_alpha_numeric_) || !defined(_di_f_utf_is_alpha_numeric_) + extern f_return_status private_f_utf_character_is_alpha_numeric(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_alpha_numeric_) || !defined(_di_f_utf_is_alpha_numeric_) + +/** * Private implementation of f_utf_character_is_control(). * * Intended to be shared to each of the different implementation variations. @@ -61,6 +107,29 @@ extern "C" { #endif // !defined(_di_f_utf_character_is_control_picture_) || !defined(_di_f_utf_is_control_picture_) /** + * Private implementation of f_utf_character_is_numeric(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_numeric() + * @see f_utf_is_numeric() + */ +#if !defined(_di_f_utf_character_is_numeric_) || !defined(_di_f_utf_is_numeric_) + extern f_return_status private_f_utf_character_is_numeric(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_numeric_) || !defined(_di_f_utf_is_numeric_) + +/** * Private implementation of f_utf_character_is_valid(). * * Intended to be shared to each of the different implementation variations. @@ -103,6 +172,52 @@ extern "C" { #endif // !defined(_di_f_utf_character_is_whitespace_) || !defined(_di_f_utf_is_whitespace_) /** + * Private implementation of f_utf_character_is_word(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_word() + * @see f_utf_is_word() + */ +#if !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) + extern f_return_status private_f_utf_character_is_word(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_is_word_) + +/** + * Private implementation of f_utf_character_is_word_dash(). + * + * Intended to be shared to each of the different implementation variations. + * + * @param character + * The character to validate. + * @param width + * The number of bytes repesenting the character width. + * + * @return + * f_true if a UTF-8 control character. + * f_false if not a UTF-8 control character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_character_is_word_dash() + * @see f_utf_is_word_dash() + */ +#if !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_is_word_dash_) + extern f_return_status private_f_utf_character_is_word_dash(const f_utf_character character, const uint8_t width) f_gcc_attribute_visibility_internal; +#endif // !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_is_word_dash_) + +/** * Private implementation of f_utf_character_is_zero_width(). * * Intended to be shared to each of the different implementation variations. diff --git a/level_0/f_utf/c/utf.c b/level_0/f_utf/c/utf.c index 8ff3ce3..b7ea77f 100644 --- a/level_0/f_utf/c/utf.c +++ b/level_0/f_utf/c/utf.c @@ -21,6 +21,46 @@ extern "C" { } #endif // _di_f_utf_character_is_ +#ifndef _di_f_utf_character_is_alpha_ + f_return_status f_utf_character_is_alpha(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isalpha(f_macro_utf_character_to_char_1(character))) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_alpha(character, width); + } +#endif // _di_f_utf_character_is_alpha_ + +#ifndef _di_f_utf_character_is_alpha_numeric_ + f_return_status f_utf_character_is_alpha_numeric(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isalnum(f_macro_utf_character_to_char_1(character))) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_alpha_numeric(character, width); + } +#endif // _di_f_utf_character_is_alpha_numeric_ + #ifndef _di_f_utf_character_is_control_ f_return_status f_utf_character_is_control(const f_utf_character character) { unsigned short width = f_macro_utf_character_width_is(character); @@ -105,6 +145,26 @@ extern "C" { } #endif // _di_f_utf_character_is_graph_ +#ifndef _di_f_utf_character_is_numeric_ + f_return_status f_utf_character_is_numeric(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isdigit(f_macro_utf_character_to_char_1(character))) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_numeric(character, width); + } +#endif // _di_f_utf_character_is_numeric_ + #ifndef _di_f_utf_character_is_valid_ f_return_status f_utf_character_is_valid(const f_utf_character character) { unsigned short width = f_macro_utf_character_width_is(character); @@ -137,6 +197,46 @@ extern "C" { } #endif // _di_f_utf_character_is_whitespace_ +#ifndef _di_f_utf_character_is_word_ + f_return_status f_utf_character_is_word(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isalnum(f_macro_utf_character_to_char_1(character)) || character == '_') { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_word(character, width); + } +#endif // _di_f_utf_character_is_word_ + +#ifndef _di_f_utf_character_is_word_dash_ + f_return_status f_utf_character_is_word_dash(const f_utf_character character) { + unsigned short width = f_macro_utf_character_width_is(character); + + if (width == 0) { + if (isalnum(f_macro_utf_character_to_char_1(character)) || character == '_' || character == '-') { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_invalid_utf); + } + + return private_f_utf_character_is_word_dash(character, width); + } +#endif // _di_f_utf_character_is_word_dash_ + #ifndef _di_f_utf_character_is_zero_width_ f_return_status f_utf_character_is_zero_width(const f_utf_character character) { if (f_macro_utf_character_width_is(character) == 1) { @@ -236,6 +336,74 @@ extern "C" { } #endif // _di_f_utf_is_ +#ifndef _di_f_utf_is_alpha_ + f_return_status f_utf_is_alpha(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (isalpha(*character)) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_alpha(character_utf, width); + } +#endif // _di_f_utf_is_alpha_ + +#ifndef _di_f_utf_is_alpha_numeric_ + f_return_status f_utf_is_alpha_numeric(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (isalnum(*character)) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_alpha_numeric(character_utf, width); + } +#endif // _di_f_utf_is_alpha_numeric_ + #ifndef _di_f_utf_is_control_ f_return_status f_utf_is_control(const f_string character, const uint8_t width_max) { #ifndef _di_level_0_parameter_checking_ @@ -348,6 +516,40 @@ extern "C" { } #endif // _di_f_utf_is_graph_ +#ifndef _di_f_utf_is_numeric_ + f_return_status f_utf_is_numeric(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (isdigit(*character)) { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_numeric(character_utf, width); + } +#endif // _di_f_utf_is_numeric_ + #ifndef _di_f_utf_is_valid_ f_return_status f_utf_is_valid(const f_string character, const uint8_t width_max) { #ifndef _di_level_0_parameter_checking_ @@ -408,6 +610,74 @@ extern "C" { } #endif // _di_f_utf_is_whitespace_ +#ifndef _di_f_utf_is_word_ + f_return_status f_utf_is_word(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (iscntrl(*character) || *character == '_') { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_word(character_utf, width); + } +#endif // _di_f_utf_is_word_ + +#ifndef _di_f_utf_is_word_dash_ + f_return_status f_utf_is_word_dash(const f_string character, const uint8_t width_max) { + #ifndef _di_level_0_parameter_checking_ + if (width_max < 1) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_0_parameter_checking_ + + uint8_t width = f_macro_utf_byte_width_is(*character); + + if (width == 0) { + if (iscntrl(*character) || *character == '_' || *character == '-') { + return f_true; + } + + return f_false; + } + + if (width == 1) { + return f_status_is_error(f_incomplete_utf); + } + + f_utf_character character_utf = 0; + + { + f_status status = 0; + + status = f_utf_char_to_character(character, width_max, &character_utf); + + if (status != f_none) return status; + } + + return private_f_utf_character_is_word_dash(character_utf, width); + } +#endif // _di_f_utf_is_word_dash_ + #ifndef _di_f_utf_is_zero_width_ f_return_status f_utf_is_zero_width(const f_string character, const uint8_t width_max) { #ifndef _di_level_0_parameter_checking_ diff --git a/level_0/f_utf/c/utf.h b/level_0/f_utf/c/utf.h index 16829e9..abf79b9 100644 --- a/level_0/f_utf/c/utf.h +++ b/level_0/f_utf/c/utf.h @@ -520,6 +520,46 @@ extern "C" { #endif // _di_f_utf_character_is_ /** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 alphabet character. + * f_false if not a UTF-8 alphabet character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_alpha() + */ +#ifndef _di_f_utf_character_is_alpha_ + extern f_return_status f_utf_character_is_alpha(const f_utf_character character); +#endif // _di_f_utf_character_is_alpha_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabetic or numeric character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 alpha-numeric character. + * f_false if not a UTF-8 alpha-numeric character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_alphanumeric() + */ +#ifndef _di_f_utf_character_is_alpha_numeric_ + extern f_return_status f_utf_character_is_alpha_numeric(const f_utf_character character); +#endif // _di_f_utf_character_is_alpha_numeric_ + +/** * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character. * * @param character @@ -599,6 +639,26 @@ extern "C" { #endif // _di_f_utf_character_is_graph_ /** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 numeric character. + * f_false if not a UTF-8 numeric character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_numeric() + */ +#ifndef _di_f_utf_character_is_numeric_ + extern f_return_status f_utf_character_is_numeric(const f_utf_character character); +#endif // _di_f_utf_character_is_numeric_ + +/** * Check to see if the entire byte block of the character is a valid UTF-8 character. * * This does validate if the UTF-8 character is a valid UTF-8 character. @@ -645,6 +705,50 @@ extern "C" { #endif // _di_f_utf_character_is_whitespace_ /** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character. + * + * A word character is alpha-numeric or an underscore '_'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 word character. + * f_false if not a UTF-8 word character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_word() + */ +#ifndef _di_f_utf_character_is_word_ + extern f_return_status f_utf_character_is_word(const f_utf_character character); +#endif // _di_f_utf_character_is_word_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character. + * + * A word dash character is alpha-numeric, an underscore '_' or a dash '-'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 word or dash character. + * f_false if not a UTF-8 word or dash character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_word_dash() + */ +#ifndef _di_f_utf_character_is_word_dash_ + extern f_return_status f_utf_character_is_word_dash(const f_utf_character character); +#endif // _di_f_utf_character_is_word_dash_ + +/** * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character. * * Only characters that do not print, which are generally called zero-width. @@ -664,6 +768,28 @@ extern "C" { #endif // _di_f_utf_character_is_zero_width_ /** + * Check to see if the entire byte block of the character is an word character. + * + * A word character is alphanumeric or underscore '_'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * + * @return + * f_true if a UTF-8 alpha character. + * f_false if not a UTF-8 alpha character. + * f_invalid_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see iscntrl() + * @see f_utf_is_word() + */ +#ifndef _di_f_utf_character_is_word_ + extern f_return_status f_utf_character_is_word(const f_utf_character character); +#endif // _di_f_utf_character_is_word_ + +/** * Convert a specialized f_utf_character type to a int8_t, stored as a string (character buffer). * * This will also convert ASCII characters stored in the utf_character array. @@ -730,6 +856,54 @@ extern "C" { #endif // _di_f_utf_is_ /** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 alphabet character. + * f_false if not a UTF-8 alphabet character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_alpha() + */ +#ifndef _di_f_utf_is_alpha_ + extern f_return_status f_utf_is_alpha(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_alpha_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 alphabet or numeric character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 alpha-numeric character. + * f_false if not a UTF-8 alpha-numeric character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_alpha_numeric() + */ +#ifndef _di_f_utf_is_alpha_numeric_ + extern f_return_status f_utf_is_alpha_numeric(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_alpha_numeric_ + +/** * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character. * * @param character @@ -838,6 +1012,30 @@ extern "C" { #endif // _di_f_utf_is_graph_ /** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 numeric character. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 numeric character. + * f_false if not a UTF-8 numeric character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_numeric() + */ +#ifndef _di_f_utf_is_numeric_ + extern f_return_status f_utf_is_numeric(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_numeric_ + +/** * Check to see if the entire byte block of the character is a UTF-8 character and if that character is a valid UTF-8. * * This does check the validity of the character, to not do this use f_utf_is(). @@ -893,6 +1091,58 @@ extern "C" { #endif // _di_f_utf_is_whitespace_ /** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 word character. + * + * A word character is alpha-numeric or an underscore '_'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 word character. + * f_false if not a UTF-8 word character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_word() + */ +#ifndef _di_f_utf_is_word_ + extern f_return_status f_utf_is_word(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_word_ + +/** + * Check to see if the entire byte block of the character is an ASCII or UTF-8 word or dash character. + * + * A word dash character is alpha-numeric, an underscore '_' or a dash '-'. + * + * @todo Incomplete, UTF-8 codes not yet checked! + * + * @param character + * The character to validate. + * There must be enough space allocated to compare against, as limited by width_max. + * @param width_max + * The maximum width available for checking. + * Can be anything greater than 0. + * + * @return + * f_true if a UTF-8 word or dash character. + * f_false if not a UTF-8 word or dash character. + * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment. + * + * @see iscntrl() + * @see f_utf_character_is_word_dash() + */ +#ifndef _di_f_utf_is_word_dash_ + extern f_return_status f_utf_is_word_dash(const f_string character, const uint8_t width_max); +#endif // _di_f_utf_is_word_dash_ + +/** * Check to see if the entire byte block of the character is an ASCII or UTF-8 general non-printing character. * * Only characters that do not print, which are generally called zero-width. -- 1.8.3.1