There seem to be "Control Format".
Create functions for "Control Code" and "Control Format" (is_control_code and is_control_format functions).
The is_control functions now check for both.
return F_false;
}
+ // is_control() handles both is_control_code() and is_control_format().
if (private_f_utf_character_is_control(character, width)) {
return F_false;
}
return F_false;
}
+ // is_control() handles both is_control_code() and is_control_format().
if (private_f_utf_character_is_control(character, width)) {
return F_false;
}
return F_false;
}
+ // is_control() handles both is_control_code() and is_control_format().
if (private_f_utf_character_is_control(character, width)) {
return F_false;
}
if (width == 2) {
+ // Control Codes.
+
// Latin-1 Supplement: U+0080 to U+009F.
if (character >= 0xc2800000 && character <= 0xc29f0000) {
return F_true;
}
+
+ // Control Formats.
+
+ // Latin-1 Supplement: U+00AD.
+ if (character == 0xc2ad0000) {
+ return F_true;
+ }
+
+ // Arabic: U+0600 to U+0605.
+ if (character >= 0xd8800000 && character <= 0xd8850000) {
+ return F_true;
+ }
+
+ // Arabic: U+061C, U+06DD.
+ if (character == 0xd89c0000 || character == 0xdb9d0000) {
+ return F_true;
+ }
+
+ // Syriac: U+070F.
+ if (character == 0xdc8f0000) {
+ return F_true;
+ }
}
else if (width == 3) {
- // General Punctuation: U+200E and U+200F.
- if (character == 0xe2808e00 || character == 0xe2808f00) {
+ // Control Formats.
+
+ // Arabic Extended-A: U+08E2.
+ if (character == 0xe0a3a200) {
return F_true;
}
- // General Punctuation: U+2066 to U+2069.
- if (character >= 0xe281a600 && character <= 0xe281a900) {
+ // Mongolian: U+180E.
+ if (character == 0xe1a08e00) {
+ return F_true;
+ }
+
+ // General Punctuation: U+200B to U+200F.
+ if (character >= 0xe2808b00 && character <= 0xe2808f00) {
+ return F_true;
+ }
+
+ // General Punctuation: U+202A to U+202E.
+ if (character >= 0xe280aa00 && character <= 0xe280ae00) {
+ return F_true;
+ }
+
+ // General Punctuation: U+2060 to U+2064.
+ if (character >= 0xe281a000 && character <= 0xe281a400) {
+ return F_true;
+ }
+
+ // General Punctuation: U+2066 to U+206F.
+ if (character >= 0xe281a600 && character <= 0xe281af00) {
+ return F_true;
+ }
+
+ // Arabic Presentation Forms-B: U+FEFF.
+ if (character == 0xefbbbf00) {
return F_true;
}
- // Special: U+FFF9 to U+FFFB.
+ // Specials: U+FFF9 to U+FFFB.
if (character >= 0xefbfb900 && character <= 0xefbfbb00) {
return F_true;
}
}
else if (width == 4) {
- // Tags: U+E0001 and U+E007F.
- if (character == 0xf3a08081 || character == 0xf3a081bf) {
+ // Control Formats.
+
+ // Kaithi: U+110BD, U+110CD.
+ if (character == 0xf09182bd || character == 0xf091838d) {
+ return F_true;
+ }
+
+ // Egyptian Hieroglyphics: U+13430 to U+13438.
+ if (character >= 0xf09390b0 && character <= 0xf09390b8) {
+ return F_true;
+ }
+
+ // Shothand Format Controls: U+1BCA0 to U+1BCA3.
+ if (character >= 0xf09bb2a0 && character <= 0xf09bb2a3) {
+ return F_true;
+ }
+
+ // Music Symbols: U+1D173 to U+1D17A.
+ if (character >= 0xf09d85b3 && character <= 0xf09d85ba) {
+ return F_true;
+ }
+
+ // Tags: U+E0001.
+ if (character == 0xf3a08081) {
+ return F_true;
+ }
+
+ // Tags: U+E0020 to U+E007F.
+ if (character >= 0xf3a080a0 && character <= 0xf3a081bf) {
return F_true;
}
}
}
#endif // !defined(_di_f_utf_character_is_control_) || !defined(_di_f_utf_is_control_)
+#if !defined(_di_f_utf_character_is_control_code_) || !defined(_di_f_utf_is_control_code_)
+ f_status_t private_f_utf_character_is_control_code(const f_utf_character_t character, const uint8_t width) {
+
+ if (width == 2) {
+
+ // Latin-1 Supplement: U+0080 to U+009F.
+ if (character >= 0xc2800000 && character <= 0xc29f0000) {
+ return F_true;
+ }
+ }
+
+ return F_false;
+ }
+#endif // !defined(_di_f_utf_character_is_control_code_) || !defined(_di_f_utf_is_contro_codel_)
+
+#if !defined(_di_f_utf_character_is_control_format_) || !defined(_di_f_utf_is_control_format_)
+ f_status_t private_f_utf_character_is_control_format(const f_utf_character_t character, const uint8_t width) {
+
+ if (width == 2) {
+
+ // Latin-1 Supplement: U+00AD.
+ if (character == 0xc2ad0000) {
+ return F_true;
+ }
+
+ // Arabic: U+0600 to U+0605.
+ if (character >= 0xd8800000 && character <= 0xd8850000) {
+ return F_true;
+ }
+
+ // Arabic: U+061C, U+06DD.
+ if (character == 0xd89c0000 || character == 0xdb9d0000) {
+ return F_true;
+ }
+
+ // Syriac: U+070F.
+ if (character == 0xdc8f0000) {
+ return F_true;
+ }
+ }
+ else if (width == 3) {
+
+ // Arabic Extended-A: U+08E2.
+ if (character == 0xe0a3a200) {
+ return F_true;
+ }
+
+ // Mongolian: U+180E.
+ if (character == 0xe1a08e00) {
+ return F_true;
+ }
+
+ // General Punctuation: U+200B to U+200F.
+ if (character >= 0xe2808b00 && character <= 0xe2808f00) {
+ return F_true;
+ }
+
+ // General Punctuation: U+202A to U+202E.
+ if (character >= 0xe280aa00 && character <= 0xe280ae00) {
+ return F_true;
+ }
+
+ // General Punctuation: U+2060 to U+2064.
+ if (character >= 0xe281a000 && character <= 0xe281a400) {
+ return F_true;
+ }
+
+ // General Punctuation: U+2066 to U+206F.
+ if (character >= 0xe281a600 && character <= 0xe281af00) {
+ return F_true;
+ }
+
+ // Arabic Presentation Forms-B: U+FEFF.
+ if (character == 0xefbbbf00) {
+ return F_true;
+ }
+
+ // Specials: U+FFF9 to U+FFFB.
+ if (character >= 0xefbfb900 && character <= 0xefbfbb00) {
+ return F_true;
+ }
+ }
+ else if (width == 4) {
+
+ // Kaithi: U+110BD, U+110CD.
+ if (character == 0xf09182bd || character == 0xf091838d) {
+ return F_true;
+ }
+
+ // Egyptian Hieroglyphics: U+13430 to U+13438.
+ if (character >= 0xf09390b0 && character <= 0xf09390b8) {
+ return F_true;
+ }
+
+ // Shothand Format Controls: U+1BCA0 to U+1BCA3.
+ if (character >= 0xf09bb2a0 && character <= 0xf09bb2a3) {
+ return F_true;
+ }
+
+ // Music Symbols: U+1D173 to U+1D17A.
+ if (character >= 0xf09d85b3 && character <= 0xf09d85ba) {
+ return F_true;
+ }
+
+ // Tags: U+E0001.
+ if (character == 0xf3a08081) {
+ return F_true;
+ }
+
+ // Tags: U+E0020 to U+E007F.
+ if (character >= 0xf3a080a0 && character <= 0xf3a081bf) {
+ return F_true;
+ }
+ }
+
+ return F_false;
+ }
+#endif // !defined(_di_f_utf_character_is_control_format_) || !defined(_di_f_utf_is_control_format_)
+
#if !defined(_di_f_utf_character_is_control_picture_) || !defined(_di_f_utf_is_control_picture_)
f_status_t private_f_utf_character_is_control_picture(const f_utf_character_t character, const uint8_t width) {
* The number of bytes repesenting the character width.
*
* @return
- * F_true if a UTF-8 control picture character.
- * F_false if not a UTF-8 control picture character.
+ * F_true if a UTF-8 combining character.
+ * F_false if not a UTF-8 combining character.
*
* F_utf (with error bit) if character is an invalid UTF-8 character.
*
#endif // !defined(_di_f_utf_character_is_control_) || !defined(_di_f_utf_is_control_)
/**
+ * Private implementation of f_utf_character_is_control_code().
+ *
+ * Intended to be shared to each of the different implementation variations.
+ *
+ * @param character
+ * The character to validate.
+ * @param width
+ * The number of bytes repesenting the character width.
+ *
+ * @return
+ * F_true if a UTF-8 control character.
+ * F_false if not a UTF-8 control character.
+ *
+ * F_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see f_utf_character_is_control_code()
+ * @see f_utf_is_control_code()
+ */
+#if !defined(_di_f_utf_character_is_control_code_) || !defined(_di_f_utf_is_control_code_)
+ extern f_status_t private_f_utf_character_is_control_code(const f_utf_character_t character, const uint8_t width) F_attribute_visibility_internal_d;
+#endif // !defined(_di_f_utf_character_is_control_code_) || !defined(_di_f_utf_is_control_code_)
+
+/**
+ * Private implementation of f_utf_character_is_control_format().
+ *
+ * Intended to be shared to each of the different implementation variations.
+ *
+ * @param character
+ * The character to validate.
+ * @param width
+ * The number of bytes repesenting the character width.
+ *
+ * @return
+ * F_true if a UTF-8 control character.
+ * F_false if not a UTF-8 control character.
+ *
+ * F_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see f_utf_character_is_control_format()
+ * @see f_utf_is_control_format()
+ */
+#if !defined(_di_f_utf_character_is_control_format_) || !defined(_di_f_utf_is_control_format_)
+ extern f_status_t private_f_utf_character_is_control_format(const f_utf_character_t character, const uint8_t width) F_attribute_visibility_internal_d;
+#endif // !defined(_di_f_utf_character_is_control_format_) || !defined(_di_f_utf_is_control_format_)
+
+/**
* Private implementation of f_utf_character_is_control_picture().
*
* Intended to be shared to each of the different implementation variations.
}
#endif // _di_f_utf_character_is_control_
+#ifndef _di_f_utf_character_is_control_code_
+ f_status_t f_utf_character_is_control_code(const f_utf_character_t character) {
+
+ const uint8_t width = macro_f_utf_character_t_width_is(character);
+
+ if (!width) {
+ if (iscntrl(macro_f_utf_character_t_to_char_1(character))) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+
+ if (width == 1) {
+ return F_status_is_error(F_utf);
+ }
+
+ return private_f_utf_character_is_control_code(character, width);
+ }
+#endif // _di_f_utf_character_is_control_code_
+
+#ifndef _di_f_utf_character_is_control_picture_
+ f_status_t character_is_control_format(const f_utf_character_t character) {
+
+ const uint8_t width = macro_f_utf_character_t_width_is(character);
+
+ if (!width) {
+
+ // There are no control format characters in ASCII.
+ return F_false;
+ }
+
+ if (width == 1) {
+ return F_status_is_error(F_utf);
+ }
+
+ return private_f_utf_character_is_control_format(character, width);
+ }
+#endif // _di_f_utf_character_is_control_format_
+
#ifndef _di_f_utf_character_is_control_picture_
f_status_t f_utf_character_is_control_picture(const f_utf_character_t character) {
}
#endif // _di_f_utf_is_control_
+#ifndef _di_f_utf_is_control_code
+ f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ const uint8_t width = macro_f_utf_byte_width_is(*character);
+
+ if (!width) {
+ if (iscntrl(*character)) {
+ return F_true;
+ }
+
+ return F_false;
+ }
+
+ if (width == 1) {
+ return F_status_is_error(F_complete_not_utf);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+
+ return private_f_utf_character_is_control_code(character_utf, width);
+ }
+#endif // _di_f_utf_is_control_code_
+
+#ifndef _di_f_utf_is_control_format_
+ f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max) {
+ #ifndef _di_level_0_parameter_checking_
+ if (width_max < 1) return F_status_set_error(F_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ const uint8_t width = macro_f_utf_byte_width_is(*character);
+
+ // There are no ASCII control formats.
+ if (!width) {
+ return F_false;
+ }
+
+ if (width == 1) {
+ return F_status_is_error(F_complete_not_utf);
+ }
+
+ f_utf_character_t character_utf = 0;
+
+ {
+ const f_status_t status = private_f_utf_char_to_character(character, width_max, &character_utf);
+ if (F_status_is_error(status)) return status;
+ }
+
+ return private_f_utf_character_is_control_format(character_utf, width);
+ }
+#endif // _di_f_utf_is_control_format_
+
#ifndef _di_f_utf_is_control_picture_
f_status_t f_utf_is_control_picture(const f_string_t character, const f_array_length_t width_max) {
#ifndef _di_level_0_parameter_checking_
return F_status_is_error(F_complete_not_utf);
}
+ if (width != 3) {
+ return F_false;
+ }
+
f_utf_character_t character_utf = 0;
{
* The character to validate.
*
* @return
- * F_true if a UTF-8 control picture character.
- * F_false if not a UTF-8 control picture character.
+ * F_true if a UTF-8 combining character.
+ * F_false if not a UTF-8 combining character.
*
* F_utf (with error bit) if character is an invalid UTF-8 character.
*/
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
*
+ * This includes control code and control format characters.
+ *
* @param character
* The character to validate.
*
#endif // _di_f_utf_character_is_control_
/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control code character.
+ *
+ * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 control code character.
+ * F_false if not a UTF-8 control code character.
+ *
+ * F_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see iscntrl()
+ */
+#ifndef _di_f_utf_character_is_control_code_
+ extern f_status_t f_utf_character_is_control_code(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_code_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control format character.
+ *
+ * Control Format characters are special characters used for formatting.
+ * These are considered control characters.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * F_true if a UTF-8 control format character.
+ * F_false if not a UTF-8 control format character.
+ *
+ * F_utf (with error bit) if character is an invalid UTF-8 character.
+ */
+#ifndef _di_f_utf_character_is_control_format_
+ extern f_status_t f_utf_character_is_control_format(const f_utf_character_t character);
+#endif // _di_f_utf_character_is_control_format_
+
+/**
* Check to see if the entire byte block of the character is a UTF-8 control picture character.
*
* Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
* Can be anything greater than 0.
*
* @return
- * F_true if a UTF-8 control picture character.
- * F_false if not a UTF-8 control picture character.
+ * F_true if a UTF-8 combining character.
+ * F_false if not a UTF-8 combining character.
*
* F_complete_not_utf (with error bit) if character is an incomplete UTF-8 fragment.
*/
/**
* Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
*
+ * This includes control code and control format characters.
+ *
* @param character
* The character to validate.
* There must be enough space allocated to compare against, as limited by width_max.
#endif // _di_f_utf_is_control_
/**
+ * Check to see if the entire byte block of the character is a UTF-8 control code character.
+ *
+ * Control Code characters are the traditional control characters, such as "\n" as well as some newer Unicode ones.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 control code character.
+ * F_false if not a UTF-8 control code character.
+ *
+ * F_complete_not_utf (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_control_code_
+ extern f_status_t f_utf_is_control_code(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_code_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control format character.
+ *
+ * Control Format characters are special characters used for formatting.
+ * These are considered control characters.
+ *
+ * @param character
+ * The character to validate.
+ * There must be enough space allocated to compare against, as limited by width_max.
+ * @param width_max
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
+ *
+ * @return
+ * F_true if a UTF-8 control format character.
+ * F_false if not a UTF-8 control format character.
+ *
+ * F_complete_not_utf (with error bit) if character is an incomplete UTF-8 fragment.
+ */
+#ifndef _di_f_utf_is_control_format_
+ extern f_status_t f_utf_is_control_format(const f_string_t character, const f_array_length_t width_max);
+#endif // _di_f_utf_is_control_format_
+
+/**
* Check to see if the entire byte block of the character is a UTF-8 control picture character.
*
* Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.