The is alphabetic needs to perform the is valid check because its default catch-all is returning F_true.
Ideally at some point (probably distant point) in the future, the literal codes for alphabetic will be matched rather than calling all of the other functions.
In this situation the is valid check can be removed.
Several of the is digit test value assignments are not checking if the value (the pointer) is NULL.
Some of the is word sequences are incorrect.
Add missing f_utf_character_is_alphabetic_numeric().
Fix function name for f_utf_character_is_control_format().
Several is word checks for f_utf_char_t are improperly comparing the entire sequence to an ASCII value when only the first byte should be compared.
#include "private-utf_phonetic.h"
#include "private-utf_punctuation.h"
#include "private-utf_symbol.h"
+#include "private-utf_valid.h"
#include "private-utf_whitespace.h"
#include "private-utf_zero_width.h"
#if !defined(_di_f_utf_character_is_alphabetic_) || !defined(_di_f_utf_is_alphabetic_)
f_status_t private_f_utf_character_is_alphabetic(const f_utf_char_t sequence) {
+ if (!private_f_utf_character_is_valid(sequence)) {
+ return F_false;
+ }
+
if (private_f_utf_character_is_zero_width(sequence)) {
return F_false;
}
+ if (private_f_utf_character_is_combining(sequence)) {
+ return F_false;
+ }
+
// The is_control() handles both is_control_code() and is_control_format().
if (private_f_utf_character_is_control(sequence)) {
return F_false;
#if !defined(_di_f_utf_character_is_alphabetic_digit_) || !defined(_di_f_utf_is_alphabetic_digit_)
f_status_t private_f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence, uint64_t * const value) {
+ if (!private_f_utf_character_is_valid(sequence)) {
+ return F_false;
+ }
+
if (private_f_utf_character_is_digit(sequence, value)) {
return F_true;
}
return F_false;
}
+ if (private_f_utf_character_is_combining(sequence)) {
+ return F_false;
+ }
+
// The is_control() handles both is_control_code() and is_control_format().
if (private_f_utf_character_is_control(sequence)) {
return F_false;
#if !defined(_di_f_utf_character_is_alphabetic_numeric_) || !defined(_di_f_utf_is_alphabetic_numeric_)
f_status_t private_f_utf_character_is_alphabetic_numeric(const f_utf_char_t sequence) {
+ if (!private_f_utf_character_is_valid(sequence)) {
+ return F_false;
+ }
+
if (private_f_utf_character_is_numeric(sequence)) {
return F_true;
}
return F_false;
}
+ if (private_f_utf_character_is_combining(sequence)) {
+ return F_false;
+ }
+
// The is_control() handles both is_control_code() and is_control_format().
if (private_f_utf_character_is_control(sequence)) {
return F_false;
// Tamil: U+0BF0.
if (sequence == 0xe0afb000) {
- *value = 10;
+ if (value) {
+ *value = 10;
+ }
return F_true;
}
// Tamil: U+0BF1.
if (sequence == 0xe0afb100) {
- *value = 100;
+ if (value) {
+ *value = 100;
+ }
return F_true;
}
// Tamil: U+0BF2.
- *value = 1000;
+ if (value) {
+ *value = 1000;
+ }
return F_true;
}
// Telugu: U+0C70.
if (sequence == 0xe0afb000) {
- *value = 10;
+ if (value) {
+ *value = 10;
+ }
return F_true;
}
// Telugu: U+0C71.
if (sequence == 0xe0afb100) {
- *value = 100;
+ if (value) {
+ *value = 100;
+ }
return F_true;
}
// Telugu: U+0C72.
- *value = 1000;
+ if (value) {
+ *value = 1000;
+ }
return F_true;
}
// Ethiopic: U+1372.
if (sequence == 0xe18db200) {
- *value = 10;
+ if (value) {
+ *value = 10;
+ }
return F_true;
}
// Ethiopic: U+1373.
if (sequence == 0xe18db300) {
- *value = 20;
+ if (value) {
+ *value = 20;
+ }
return F_true;
}
// Ethiopic: U+1374.
if (sequence == 0xe18db400) {
- *value = 30;
+ if (value) {
+ *value = 30;
+ }
return F_true;
}
// Ethiopic: U+1375.
if (sequence == 0xe18db500) {
- *value = 40;
+ if (value) {
+ *value = 40;
+ }
return F_true;
}
// Ethiopic: U+1376.
if (sequence == 0xe18db600) {
- *value = 50;
+ if (value) {
+ *value = 50;
+ }
return F_true;
}
// Ethiopic: U+1377.
if (sequence == 0xe18db700) {
- *value = 60;
+ if (value) {
+ *value = 60;
+ }
return F_true;
}
// Ethiopic: U+1378.
if (sequence == 0xe18db800) {
- *value = 70;
+ if (value) {
+ *value = 70;
+ }
return F_true;
}
// Ethiopic: U+1379.
if (sequence == 0xe18db900) {
- *value = 80;
+ if (value) {
+ *value = 80;
+ }
return F_true;
}
// Ethiopic: U+137A.
if (sequence == 0xe18dba00) {
- *value = 90;
+ if (value) {
+ *value = 90;
+ }
return F_true;
}
// Ethiopic: U+137B.
if (sequence == 0xe18dbb00) {
- *value = 100;
+ if (value) {
+ *value = 100;
+ }
return F_true;
}
// Ethiopic: U+137C.
- *value = 1000;
+ if (value) {
+ *value = 1000;
+ }
return F_true;
}
// Runic: U+16EE.
if (sequence == 0xe19bae00) {
- *value = 17;
+ if (value) {
+ *value = 17;
+ }
return F_true;
}
// Runic: U+16EF.
if (sequence == 0xe19baf00) {
- *value = 18;
+ if (value) {
+ *value = 18;
+ }
return F_true;
}
// Runic: U+16F0.
- *value = 19;
+ if (value) {
+ *value = 19;
+ }
return F_true;
}
// New Tai Lue: U+19DA.
if (sequence == 0xe1a79a00) {
- *value = 1;
+ if (value) {
+ *value = 1;
+ }
return F_true;
}
// Superscripts and Subscripts: U+2070.
if (sequence == 0xe281b000) {
- *value = 0;
+ if (value) {
+ *value = 0;
+ }
return F_true;
}
// Superscripts and Subscripts: U+2074.
if (sequence == 0xe281b400) {
- *value = 4;
+ if (value) {
+ *value = 4;
+ }
return F_true;
}
// Superscripts and Subscripts: U+2075.
if (sequence == 0xe281b500) {
- *value = 5;
+ if (value) {
+ *value = 5;
+ }
return F_true;
}
// Superscripts and Subscripts: U+2076.
if (sequence == 0xe281b600) {
- *value = 6;
+ if (value) {
+ *value = 6;
+ }
return F_true;
}
// Superscripts and Subscripts: U+2077.
if (sequence == 0xe281b700) {
- *value = 7;
+ if (value) {
+ *value = 7;
+ }
return F_true;
}
// Superscripts and Subscripts: U+2078.
if (sequence == 0xe281b800) {
- *value = 8;
+ if (value) {
+ *value = 8;
+ }
return F_true;
}
// Superscripts and Subscripts: U+2079.
if (sequence == 0xe281b900) {
- *value = 9;
+ if (value) {
+ *value = 9;
+ }
return F_true;
}
if (strict) {
// Halfwidth and Fullwidth Forms: U+FE33 (︳), U+FE34 (︴).
- if (sequence == 0xefbcbf00 || sequence == 0xefbcbf00) {
+ if (sequence == 0xefb8b300 || sequence == 0xefb8b400) {
return F_true;
}
}
const f_status_t status = private_f_utf_char_to_character(sequence, width_max, &utf);
if (F_status_is_error(status)) return status;
}
-
return private_f_utf_character_is_word_dash_plus(utf, strict);
}
* F_false if not an unassigned UTF-8 character.
*
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
- * F_parameter (with error bit) if a parameter is inunassigned.
+ * F_parameter (with error bit) if a parameter is unassigned.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
*/
}
#endif // _di_f_utf_character_is_alphabetic_digit_
+#ifndef _di_f_utf_character_is_alphabetic_numeric_
+ f_status_t f_utf_character_is_alphabetic_numeric(const f_utf_char_t sequence) {
+
+ if (macro_f_utf_char_t_width_is(sequence)) {
+ if (macro_f_utf_char_t_width_is(sequence) == 1) {
+ return F_status_set_error(F_utf_fragment);
+ }
+
+ return private_f_utf_character_is_alphabetic_numeric(sequence);
+ }
+
+ if (isalnum(macro_f_utf_char_t_to_char_1(sequence))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+#endif // _di_f_utf_character_is_alphabetic_numeric_
+
#ifndef _di_f_utf_character_is_ascii_
f_status_t f_utf_character_is_ascii(const f_utf_char_t sequence) {
}
#endif // _di_f_utf_character_is_control_code_
-#ifndef _di_f_utf_character_is_control_picture_
- f_status_t character_is_control_format(const f_utf_char_t sequence) {
+#ifndef _di_f_utf_character_is_control_format_
+ f_status_t f_utf_character_is_control_format(const f_utf_char_t sequence) {
if (macro_f_utf_char_t_width_is(sequence)) {
if (macro_f_utf_char_t_width_is(sequence) == 1) {
return private_f_utf_character_is_control_format(sequence);
}
- // There are no control format characters in ASCII.
+ // There are no ASCII control formats.
return F_false;
}
#endif // _di_f_utf_character_is_control_format_
return private_f_utf_character_is_word(sequence, strict);
}
- if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || sequence == f_string_ascii_underscore_s.string[0]) {
+ if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_underscore_s.string[0]) {
return F_true;
}
return private_f_utf_character_is_word_dash(sequence, strict);
}
- if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || sequence == f_string_ascii_underscore_s.string[0] || sequence == f_string_ascii_minus_s.string[0]) {
+ if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_underscore_s.string[0] || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_minus_s.string[0]) {
return F_true;
}
return private_f_utf_character_is_word_dash_plus(sequence, strict);
}
- if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || sequence == f_string_ascii_underscore_s.string[0] || sequence == f_string_ascii_minus_s.string[0] || sequence == f_string_ascii_plus_s.string[0]) {
+ if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_underscore_s.string[0] || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_minus_s.string[0] || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_plus_s.string[0]) {
return F_true;
}
* @return
* F_true if a UTF-8 character.
* F_false if not a UTF-8 character.
+ *
* F_utf_fragment if this is a UTF-8 character fragment.
*
* @see f_utf_character_is_valid()
* The (UTF-8) character.
*
* @return
- * F_none on success.
+ * F_true if a UTF-8 wide character.
+ * F_false if not a UTF-8 wide character.
*
- * F_failure (with error bit) if width is not long enough to convert.
- * F_parameter (with error bit) if a parameter is invalid.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
*/