* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_alpha()
* @see f_utf_is_alpha()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_control()
* @see f_utf_is_control()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_control()
* @see f_utf_is_control()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_emoji()
* @see f_utf_is_emoji()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_numeric()
* @see f_utf_is_numeric()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_punctuation()
* @see f_utf_is_punctuation()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_symbol()
* @see f_utf_is_symbol()
*/
* The number of bytes repesenting the character width.
*
* @return
- * F_true if a UTF-8 control character.
- * F_false if not a UTF-8 control character.
+ * F_true if a UTF-8 phonetic whitespace.
+ * F_false if not a UTF-8 phonetic whitespace.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_whitespace_modifier()
* @see f_utf_is_whitespace_modifier()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_word()
* @see f_utf_is_word()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_word_dash()
* @see f_utf_is_word_dash()
*/
* F_false if not a UTF-8 control character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_character_is_word_dash()
* @see f_utf_is_word_dash()
*/
return F_status_is_error(F_utf);
}
- if (private_f_utf_character_is_control(character, width) == F_true) {
+ if (private_f_utf_character_is_control(character, width)) {
return F_false;
}
- if (private_f_utf_character_is_whitespace(character, width) == F_true) {
+ if (private_f_utf_character_is_whitespace(character, width)) {
return F_false;
}
- if (private_f_utf_character_is_zero_width(character, width) == F_true) {
+ if (private_f_utf_character_is_zero_width(character, width)) {
return F_false;
}
unsigned short width = f_macro_utf_character_width_is(character);
if (width == 0) {
- if (isdigit(f_macro_utf_character_to_char_1(character))) {
+ // ASCII: '!' to '#'.
+ if (character > 0x20000000 && character < 0x24000000) {
+ return F_true;
+ }
+
+ // ASCII: '%' to '*'.
+ if (character > 0x24000000 && character < 0x2b000000) {
+ return F_true;
+ }
+
+ // ASCII: ',' to '/'.
+ if (character > 0x2b000000 && character < 0x30000000) {
+ return F_true;
+ }
+
+ // ASCII: ':', ';', '?', or '@'.
+ if (character == 0x3a000000 || character == 0x3b000000 || character == 0x3f000000 || character == 0x40000000) {
+ return F_true;
+ }
+
+ // ASCII: '[' to ']'.
+ if (character > 0x5a000000 && character < 0x5d000000) {
+ return F_true;
+ }
+
+ // ASCII: '_', '{', or '}'.
+ if (character == 0x5f000000 || character == 0x7b000000 || character == 0x7d000000) {
return F_true;
}
}
#endif // _di_f_utf_character_is_punctuation_
+#ifndef _di_f_utf_character_is_symbol_
+ f_return_status f_utf_character_is_symbol(const f_utf_character character) {
+ unsigned short width = f_macro_utf_character_width_is(character);
+
+ if (width == 0) {
+ // ASCII: '$' or '+'.
+ if (character == 0x24000000 || character == 0x2b000000) {
+ return F_true;
+ }
+
+ // ASCII: '<' to '>'.
+ if (character > 0x3c000000 && character < 0x3e000000) {
+ return F_true;
+ }
+
+ // ASCII: '^', '`', '|', or '~'.
+ if (character == 0x5e000000 || character == 0x60000000 || character == 0x7c000000 || character == 0x7e000000) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+
+ if (width == 1) {
+ return F_status_is_error(F_utf);
+ }
+
+ return private_f_utf_character_is_symbol(character, width);
+ }
+#endif // _di_f_utf_character_is_symbol_
+
#ifndef _di_f_utf_character_is_valid_
f_return_status f_utf_character_is_valid(const f_utf_character character) {
unsigned short width = f_macro_utf_character_width_is(character);
}
#endif // _di_f_utf_character_is_whitespace_
+#ifndef _di_f_utf_character_is_whitespace_modifier_
+ f_return_status f_utf_character_is_whitespace_modifier(const f_utf_character character) {
+ unsigned short width = f_macro_utf_character_width_is(character);
+
+ if (width == 0) {
+ // There are no ASCII whitespace modifiers.
+ return F_false;
+ }
+
+ if (width == 1) {
+ return F_status_is_error(F_utf);
+ }
+
+ return private_f_utf_character_is_whitespace_modifier(character, width);
+ }
+#endif // _di_f_utf_character_is_whitespace_modifier_
+
#ifndef _di_f_utf_character_is_word_
f_return_status f_utf_character_is_word(const f_utf_character character) {
unsigned short width = f_macro_utf_character_width_is(character);
if (status != F_none) return status;
}
- if (private_f_utf_character_is_control(character_utf, width) == F_true) {
+ if (private_f_utf_character_is_control(character_utf, width)) {
return F_false;
}
- if (private_f_utf_character_is_whitespace(character_utf, width) == F_true) {
+ if (private_f_utf_character_is_whitespace(character_utf, width)) {
return F_false;
}
// This test is in isolation so zero-width characters must be treated as a non-graph.
- if (private_f_utf_character_is_zero_width(character_utf, width) == F_true) {
+ if (private_f_utf_character_is_zero_width(character_utf, width)) {
return F_false;
}
uint8_t width = f_macro_utf_byte_width_is(*character);
if (width == 0) {
- if (isdigit(*character)) {
+ // ASCII: '!' to '#'.
+ if (character[0] > 0x20 && character[0] < 0x24) {
+ return F_true;
+ }
+
+ // ASCII: '%' to '*'.
+ if (character[0] > 0x24 && character[0] < 0x2b) {
+ return F_true;
+ }
+
+ // ASCII: ',' to '/'.
+ if (character[0] > 0x2b && character[0] < 0x30) {
+ return F_true;
+ }
+
+ // ASCII: ':', ';', '?', or '@'.
+ if (character[0] == 0x3a || character[0] == 0x3b || character[0] == 0x3f || character[0] == 0x40) {
+ return F_true;
+ }
+
+ // ASCII: '[' to ']'.
+ if (character[0] > 0x5a && character[0] < 0x5d) {
+ return F_true;
+ }
+
+ // ASCII: '_', '{', or '}'.
+ if (character[0] == 0x5f || character[0] == 0x7b || character[0] == 0x7d) {
return F_true;
}
}
#endif // _di_f_utf_is_punctuation_
+#ifndef _di_f_utf_is_symbol_
+ f_return_status f_utf_is_symbol(const f_string character, const f_string_length width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ uint8_t width = f_macro_utf_byte_width_is(*character);
+
+ if (width == 0) {
+ // ASCII: '$' or '+'.
+ if (character[0] == 0x24 || character[0] == 0x2b) {
+ return F_true;
+ }
+
+ // ASCII: '<' to '>'.
+ if (character[0] > 0x3c && character[0] < 0x3e) {
+ return F_true;
+ }
+
+ // ASCII: '^', '`', '|', or '~'.
+ if (character[0] == 0x5e || character[0] == 0x60 || character[0] == 0x7c || character[0] == 0x7e) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+
+ if (width == 1) {
+ return F_status_is_error(F_incomplete_utf);
+ }
+
+ f_utf_character character_utf = 0;
+
+ {
+ f_status status = 0;
+
+ status = f_utf_char_to_character(character, width_max, &character_utf);
+
+ if (status != F_none) return status;
+ }
+
+ return private_f_utf_character_is_symbol(character_utf, width);
+ }
+#endif // _di_f_utf_is_symbol_
+
#ifndef _di_f_utf_is_valid_
f_return_status f_utf_is_valid(const f_string character, const f_string_length width_max) {
#ifndef _di_level_0_parameter_checking_
uint8_t width = f_macro_utf_byte_width_is(*character);
if (width == 0) {
- if (isdigit(*character)) {
- return F_true;
- }
-
+ // There are no ASCII whitespace modifiers.
return F_false;
}
* F_false if not a UTF-8 alphabet character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
+ * @see isalpha()
*/
#ifndef _di_f_utf_character_is_alpha_
extern f_return_status f_utf_character_is_alpha(const f_utf_character character);
* F_false if not a UTF-8 alpha-numeric character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
+ * @see isalnum()
*/
#ifndef _di_f_utf_character_is_alpha_numeric_
extern f_return_status f_utf_character_is_alpha_numeric(const f_utf_character character);
* F_true if a UTF-8 emoji character.
* F_false if not a UTF-8 emoji character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
- *
- * @see iscntrl()
*/
#ifndef _di_f_utf_character_is_emoji_
extern f_return_status f_utf_character_is_emoji(const f_utf_character character);
* F_false if not a UTF-8 numeric character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
+ * @see isdigit()
* @see f_utf_is_numeric()
*/
#ifndef _di_f_utf_character_is_numeric_
* F_false if not a UTF-8 punctuation character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_is_punctuation()
*/
#ifndef _di_f_utf_character_is_punctuation_
* F_false if not a UTF-8 symbol character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
* @see f_utf_is_symbol()
*/
#ifndef _di_f_utf_character_is_symbol_
*
* @see f_utf_character_is()
* @see f_utf_character_is_fragment()
- * @see f_utf_is_valid()
*/
#ifndef _di_f_utf_character_is_valid_
extern f_return_status f_utf_character_is_valid(const f_utf_character character);
*
* Non-printing or zero-width characters are not considered whitespace.
* This does include line separators like '\n'.
+ * This does not include phonetic spaces, like whitespace modifiers.
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * However, because they are not renderred as whitespace, they are technically not white space.
*
* @param character
* The character to validate.
* F_false if not a UTF-8 whitespace.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see f_utf_is_whitespace()
+ * @see isspace()
*/
#ifndef _di_f_utf_character_is_whitespace_
extern f_return_status f_utf_character_is_whitespace(const f_utf_character character);
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 whitespace modifier character.
*
+ * These are phonetic spaces.
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
+ *
* @param character
* The character to validate.
*
* F_true if a UTF-8 modifier character.
* F_false if not a UTF-8 modifier character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
- *
- * @see iscntrl()
- * @see f_utf_is_whitespace_modifier()
*/
#ifndef _di_f_utf_character_is_whitespace_modifier_
extern f_return_status f_utf_character_is_whitespace_modifier(const f_utf_character character);
* F_false if not a UTF-8 word character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
+ * @see isalnum()
* @see f_utf_is_word()
*/
#ifndef _di_f_utf_character_is_word_
* F_false if not a UTF-8 word or dash character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
+ * @see isalnum()
* @see f_utf_is_word_dash()
*/
#ifndef _di_f_utf_character_is_word_dash_
* F_false if not a UTF-8 word or dash character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
- * @see iscntrl()
+ * @see isalnum()
* @see f_utf_is_word_dash()
*/
#ifndef _di_f_utf_character_is_word_dash_plus_
* F_true if a UTF-8 non-printing or zero-width character.
* F_false if not a UTF-8 non-printing or zero-width character.
* F_utf (with error bit) if character is an invalid UTF-8 character.
- *
- * @see f_utf_is_zero_width()
*/
#ifndef _di_f_utf_character_is_zero_width_
extern f_return_status f_utf_character_is_zero_width(const f_utf_character character);
#endif // _di_f_utf_character_is_zero_width_
/**
- * Check to see if the entire byte block of the character is an word character.
- *
- * A word character is alphanumeric or underscore '_'.
- *
- * @todo Incomplete, UTF-8 codes not yet checked!
- *
- * @param character
- * The character to validate.
- *
- * @return
- * F_true if a UTF-8 alpha character.
- * F_false if not a UTF-8 alpha character.
- * F_utf (with error bit) if character is an invalid UTF-8 character.
- *
- * @see iscntrl()
- * @see f_utf_is_word()
- */
-#ifndef _di_f_utf_character_is_word_
- extern f_return_status f_utf_character_is_word(const f_utf_character character);
-#endif // _di_f_utf_character_is_word_
-
-/**
* Convert a specialized f_utf_character type to a int8_t, stored as a string (character buffer).
*
* This will also convert ASCII characters stored in the utf_character array.
* F_false if not a UTF-8 alphabet character.
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
*
- * @see iscntrl()
+ * @see isalpha()
*/
#ifndef _di_f_utf_is_alpha_
extern f_return_status f_utf_is_alpha(const f_string character, const f_string_length width_max);
* F_false if not a UTF-8 alpha-numeric character.x
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
*
- * @see iscntrl()
+ * @see isalnum()
*/
#ifndef _di_f_utf_is_alpha_numeric_
extern f_return_status f_utf_is_alpha_numeric(const f_string character, const f_string_length width_max);
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
*
* @see iscntrl()
- * @see f_utf_character_is_control()
*/
#ifndef _di_f_utf_is_control_
extern f_return_status f_utf_is_control(const f_string character, const f_string_length width_max);
* F_true if a UTF-8 emoji character.
* F_false if not a UTF-8 emoji character.
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
*/
#ifndef _di_f_utf_is_emoji_
extern f_return_status f_utf_is_emoji(const f_string character, const f_string_length width_max);
* F_parameter (with error bit) if a parameter is invalid.
*
* @see isgraph()
- * @see iscntrl()
*/
#ifndef _di_f_utf_is_graph_
extern f_return_status f_utf_is_graph(const f_string character, const f_string_length width_max);
* F_false if not a UTF-8 numeric character.
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
*
- * @see iscntrl()
+ * @see isdigit()
*/
#ifndef _di_f_utf_is_numeric_
extern f_return_status f_utf_is_numeric(const f_string character, const f_string_length width_max);
* F_true if a UTF-8 punctuation character.
* F_false if not a UTF-8 punctuation character.
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
*/
#ifndef _di_f_utf_is_punctuation_
extern f_return_status f_utf_is_punctuation(const f_string character, const f_string_length width_max);
* F_true if a UTF-8 symbol character.
* F_false if not a UTF-8 symbol character.
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
- *
- * @see iscntrl()
*/
#ifndef _di_f_utf_is_symbol_
extern f_return_status f_utf_is_symbol(const f_string character, const f_string_length width_max);
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 general space character.
*
+ * Non-printing or zero-width characters are not considered whitespace.
+ * This does include line separators like '\n'.
+ * This does not include phonetic spaces, like whitespace modifiers.
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * However, because they are not renderred as whitespace, they are technically not white space.
+ *
* @param character
* The character to validate.
* There must be enough space allocated to compare against, as limited by width_max.
* F_parameter (with error bit) if a parameter is invalid.
*
* @see isspace()
- * @see iscntrl()
*/
#ifndef _di_f_utf_is_whitespace_
extern f_return_status f_utf_is_whitespace(const f_string character, const f_string_length width_max);
#endif // _di_f_utf_is_whitespace_
/**
+ * Check to see if the entire byte block of the character is a UTF-8 whitespace modifier character.
+ *
+ * These are phonetic spaces.
+ *
+ * Phonetic spaces are whitespaces with additional phonetic meaning associated with them.
+ * Therefore, these are valid spaces in the technical sense, even if they are not visibly whitespace.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 whitespace.
+ * F_false if not a UTF-8 whitespace.
+ * F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
+ * F_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ * F_parameter (with error bit) if a parameter is invalid.
+ */
+#ifndef _di_f_utf_is_whitespace_modifier_
+ extern f_return_status f_utf_is_whitespace_modifier(const f_string character, const f_string_length width_max);
+#endif // _di_f_utf_is_whitespace_modifier_
+
+/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 word character.
*
* A word character is alpha-numeric or an underscore '_'.
* F_false if not a UTF-8 word character.
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
*
- * @see iscntrl()
+ * @see isalnum()
*/
#ifndef _di_f_utf_is_word_
extern f_return_status f_utf_is_word(const f_string character, const f_string_length width_max);
* F_false if not a UTF-8 word or dash character.
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
*
- * @see iscntrl()
+ * @see isalnum()
*/
#ifndef _di_f_utf_is_word_dash_
extern f_return_status f_utf_is_word_dash(const f_string character, const f_string_length width_max);
* F_false if not a UTF-8 word or dash character.
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
*
- * @see iscntrl()
+ * @see isalnum()
*/
#ifndef _di_f_utf_is_word_dash_plus_
extern f_return_status f_utf_is_word_dash_plus(const f_string character, const f_string_length width_max);
* F_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
* F_maybe (with error bit) if this could be a whitespace but width is not long enough.
* F_parameter (with error bit) if a parameter is invalid.
- *
- * @see isspace()
- * @see iscntrl()
*/
#ifndef _di_f_utf_is_zero_width_
extern f_return_status f_utf_is_zero_width(const f_string character, const f_string_length width_max);