extern "C" {
#endif
-#ifndef _di_f_utf_is_big_endian_
- f_return_status f_utf_is_big_endian() {
- uint16_t test_int = (0x01 << 8) | 0x02;
- int8_t test_char[2] = {0x01, 0x02};
+#ifndef _di_f_utf_character_is_
+ f_return_status f_utf_character_is(const f_utf_character character) {
+ unsigned short width = f_macro_utf_character_width_is(character);
- if (!memcmp(&test_int, test_char, 2)) {
- return f_true;
+ if (width == 0) {
+ return f_false;
}
- return f_false;
+ if (width == 1) {
+ return f_status_is_error(f_invalid_utf);
+ }
+
+ return f_true;
}
-#endif // _di_f_utf_is_big_endian_
+#endif // _di_f_utf_character_is_
-#ifndef _di_f_utf_is_
- f_return_status f_utf_is(const f_string character, const unsigned short max_width) {
- #ifndef _di_level_0_parameter_checking_
- if (max_width < 1) return f_status_set_error(f_invalid_parameter);
- #endif // _di_level_0_parameter_checking_
+#ifndef _di_f_utf_character_is_bom_
+ f_return_status f_utf_character_is_bom(const f_utf_character character) {
+ if (character == f_utf_character_mask_bom) {
+ return f_true;
+ }
- unsigned short width = f_macro_utf_byte_width_is(*character);
+ unsigned short width = f_macro_utf_character_width_is(character);
- if (width == 0) {
- return f_false;
- }
- else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
+ if (width == 1) {
+ return f_status_is_error(f_invalid_utf);
}
- return f_true;
+ return f_false;
}
-#endif // _di_f_utf_is_
-
-#ifndef _di_f_utf_is_bom_
- f_return_status f_utf_is_bom(const f_string character, const unsigned short max_width) {
- #ifndef _di_level_0_parameter_checking_
- if (max_width < 1) return f_status_set_error(f_invalid_parameter);
- #endif // _di_level_0_parameter_checking_
+#endif // _di_f_utf_character_is_bom_
- unsigned short width = f_macro_utf_byte_width_is(*character);
+#ifndef _di_f_utf_character_is_control_
+ f_return_status f_utf_character_is_control(const f_utf_character character) {
+ unsigned short width = f_macro_utf_character_width_is(character);
if (width == 0) {
+ if (iscntrl()) {
+ return f_true;
+ }
+
return f_false;
}
- else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
+
+ if (width == 1) {
+ return f_status_is_error(f_invalid_utf);
}
- if (width > max_width) {
- return f_status_set_error(f_maybe);
+ if (width == 2) {
+ // Latin-1 Supplement: U+0080 to U+009F.
+ if (character >= 0xc2800000 && character =< 0xc29f0000) {
+ return f_true;
+ }
+
+ return f_false;
}
if (width == 3) {
- if (!memcmp(character, f_utf_bom, width)) {
+ // @todo: these might not be "control characters" and instead be "marking characters" or "combining characters".
+ // Special: U+FFF9 to U+FFFB.
+ if (character >= 0xefbfb900 && character =< 0xefbfbb00) {
+ return f_true;
+ }
+
+ return f_false;
+ }
+
+ if (width == 4) {
+ // Tags: U+E0001 and U+E007F.
+ if (character == 0xf3a08081 || character == 0xf3a081bf) {
return f_true;
}
}
return f_false;
}
-#endif // _di_f_utf_is_bom_
+#endif // _di_f_utf_character_is_control_
-#ifndef _di_f_utf_is_character_
- f_return_status f_utf_is_character(const f_utf_character character) {
+#ifndef _di_f_utf_character_is_control_picture_
+ f_return_status f_utf_character_is_control_picture(const f_utf_character character) {
unsigned short width = f_macro_utf_character_width_is(character);
if (width == 0) {
+ // There are no control picture characters in ASCII.
return f_false;
}
- else if (width == 1) {
+
+ if (width == 1) {
return f_status_is_error(f_invalid_utf);
}
- return f_true;
+ if (width != 3) {
+ return f_false;
+ }
+
+ // Control Pictures: U+2400 to U+2426.
+ if (character >= 0xe2908000 && character <= 0xe290a600) {
+ return f_true;
+ }
+
+ // Specials: U+FFFC to U+FFFD.
+ if (character == 0xefbfbc00 || character == 0xefbfbd00) {
+ return f_true;
+ }
+
+ return f_false;
}
-#endif // _di_f_utf_is_
+#endif // _di_f_utf_character_is_control_picture_
-#ifndef _di_f_utf_is_graph_
- f_return_status f_utf_is_graph(const f_string character, const unsigned short max_width) {
- #ifndef _di_level_0_parameter_checking_
- if (max_width < 1) return f_status_set_error(f_invalid_parameter);
- #endif // _di_level_0_parameter_checking_
+#ifndef _di_f_utf_character_is_fragment_
+ f_return_status f_utf_character_is_fragment(const f_utf_character character) {
+ unsigned short width = f_macro_utf_character_width_is(character);
- if (f_macro_utf_byte_width_is(*character) == 0) {
- if (isgraph(*character)) {
+ if (width == 1) return f_true;
+
+ return f_false;
+ }
+#endif // _di_f_utf_character_is_fragment_
+
+#ifndef _di_f_utf_character_is_graph_
+ f_return_status f_utf_character_is_graph(const f_utf_character character) {
+ unsigned short width = f_macro_utf_character_width_is(character);
+
+ if (width == 0) {
+ int8_t ascii = character >> 24;
+
+ if (isgraph(ascii)) {
return f_true;
}
return f_false;
}
- // For now, just assume that any non-whitespace, non-substitute UTF-8 character is a graph.
- f_status status = f_utf_is_space(character, max_width);
-
- if (f_status_is_error(status)) {
- return status;
+ if (width == 1) {
+ return f_status_is_error(f_invalid_utf);
}
- else if (status == f_true) {
+
+ if (f_utf_character_is_control(character) == f_true) {
return f_false;
}
- if (f_utf_is_bom(character, max_width) == f_true) {
+ if (f_utf_character_is_whitespace(character) == f_true) {
return f_false;
}
+ // @todo: does this need to check combining and marking characters? or are those still considered graph characters?
+
return f_true;
}
-#endif // _di_f_utf_is_graph_
-
-#ifndef _di_f_utf_is_space_
- f_return_status f_utf_is_space(const f_string character, const unsigned short max_width) {
- #ifndef _di_level_0_parameter_checking_
- if (max_width < 1) return f_status_set_error(f_invalid_parameter);
- #endif // _di_level_0_parameter_checking_
+#endif // _di_f_utf_character_is_graph_
- unsigned short width = f_macro_utf_byte_width_is(*character);
+#ifndef _di_f_utf_character_is_valid_
+ f_return_status f_utf_character_is_valid(const f_utf_character character) {
+ unsigned short width = f_macro_utf_character_width_is(character);
- if (width == 0) {
- if (isspace(*character)) {
- return f_true;
- }
+ if (width == 0) return f_false;
- return f_false;
- }
- else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
+ if (width == 1) {
+ return f_status_is_error(f_invalid_utf);
}
- if (width > max_width) {
- return f_status_set_error(f_maybe);
- }
+ // @todo: check every single character that is not allowed but is represented in UTF-8 and return false.
if (width == 2) {
- if (!memcmp(character, f_utf_space_no_break, width)) {
- return f_true;
+ // Syriac: U+070E, U+074B, U+074C.
+ if (character == 0xdc8e0000 || character == 0xdd8b0000 || character == 0xdd8c0000) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_line_feed_reverse, width)) {
- return f_true;
+ // Thaana: U+07B2 to U+07BF.
+ if (character >= 0xdeb20000 && character <= 0xdebf0000) {
+ return f_false;
}
+ }
- if (!memcmp(character, f_utf_space_line_next, width)) {
+ if (width == 3) {
+ // consider all private use codes as valid, U+E000 to U+F8FF.
+ if (character >= 0xee808000 && character <= 0xefa3bf00) {
return f_true;
}
- if (!memcmp(character, f_utf_substitute_middle_dot, width)) {
- return f_true;
+ // Sinhala: U+0D97 to U+0D99.
+ if (character >= 0xe0b69700 && character <= 0xe0b69900) {
+ return f_false;
}
- return f_false;
- }
+ // Sinhala: U+0DC7 to U+0DC9.
+ if (character >= 0xe0b78700 && character <= 0xe0b78900) {
+ return f_false;
+ }
- if (width == 3) {
- if (!memcmp(character, f_utf_space_no_break_narrow, width)) {
- return f_true;
+ // Sinhala: U+0DCB to U+0DCE.
+ if (character >= 0xe0b78b00 && character <= 0xe0b78e00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_en, width)) {
- return f_true;
+ // Sinhala: U+0DE0 to U+0DE5.
+ if (character >= 0xe0b7a000 && character <= 0xe0b7a500) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_en_quad, width)) {
- return f_true;
+ // Sinhala: U+0DF5 to U+0DFF.
+ if (character >= 0xe0b7b500 && character <= 0xe0b7bf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_en_quad, width)) {
- return f_true;
+ // Sinhala: U+0D80, U+0D81, U+0D84.
+ if (character == 0xe0b68000 || character == 0xe0b68100 || character == 0xe0b68400) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em, width)) {
- return f_true;
+ // Sinhala: U+0DB2, U+0DBC, U+0DBE.
+ if (character == 0xe0b6b200 || character == 0xe0b6bc00 || character == 0xe0b6be00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em_quad, width)) {
- return f_true;
+ // Sinhala: U+0DBF, U+0DD5, U+0DD7.
+ if (character == 0xe0b6bf00 || character == 0xe0b79500 || character == 0xe0b79700) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em_per_three, width)) {
- return f_true;
+ // Sinhala: U+0DF0, U+0DF1.
+ if (character == 0xe0b7b000 || character == 0xe0b7b100) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em_per_four, width)) {
- return f_true;
+ // Small Form Variants: U+FE6C to U+FE6F.
+ if (character >= 0xefb9ac00 && character <= 0xefb9af00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em_per_six, width)) {
- return f_true;
+ // Small Form Variants: U+FE53, U+FE67.
+ if (character == 0xefb99300 || character == 0xefb9a700) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_figure, width)) {
- return f_true;
+ // Sundanese Supplement: U+1CC8 to U+1CCF.
+ if (character >= 0xe1b38800 && character <= 0xe1b38f00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_punctuation, width)) {
- return f_true;
+ // Superscripts and Subscripts: U+2072, U+2073, U+208F.
+ if (character == 0xe281b200 || character == 0xe281b300 || character == 0xe2828f00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_thin, width)) {
- return f_true;
+ // Superscripts and Subscripts: U+209D to U+209F.
+ if (character >= 0xe2829d00 && character <= 0xe2829f00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_hair, width)) {
- return f_true;
+ // Supplemental Punctuation: U+2E45 to U+2E7F.
+ if (character >= 0xe2b98500 && character <= 0xe2b9bf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_separator_line, width)) {
- return f_true;
+ // Syloti Nagri: U+A82C to U+A82F.
+ if (character >= 0xeaa0ac00 && character <= 0xeaa0af00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_separator_paragraph, width)) {
- return f_true;
+ // Tagalog: U+1715 to U+171f.
+ if (character >= 0xe19c9500 && character <= 0xe19c9f00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_ogham, width)) {
- return f_true;
+ // Tagalog: U+170D
+ if (character == 0xe19c8d00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_ideographic, width)) {
- return f_true;
+ // Tagbanwa: U+1774 to U+177f.
+ if (character >= 0xe19db400 && character <= 0xe19dbf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_medium_mathematical, width)) {
- return f_true;
+ // Tagbanwa: U+176D, U+1771
+ if (character == 0xe19dad00 || character == 0xe19db100) {
+ return f_false;
}
- if (!memcmp(character, f_utf_substitute_symbol_blank, width)) {
- return f_true;
+ // Tai Lee: U+196E, U+196F.
+ if (character == 0xe1a5ae00 || character == 0xe1a5ef00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_substitute_symbol_space, width)) {
- return f_true;
+ // Tai Lee: U+1975 to U+197F.
+ if (character >= 0xe1a5b500 && character <= 0xe1a5bf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_substitute_open_box, width)) {
- return f_true;
+ // Tai Tham: U+1A7D to U+1A7E.
+ if (character >= 0xe1a9bd00 && character <= 0xe1a9be00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_substitute_open_box_shouldered, width)) {
- return f_true;
+ // Tai Tham: U+1A8A to U+1A8F.
+ if (character >= 0xe1aa8a00 && character <= 0xe1aa8f00) {
+ return f_false;
}
- return f_false;
- }
+ // Tai Tham: U+1A9A to U+1A9F.
+ if (character >= 0xe1aa9a00 && character <= 0xe1aa9f00) {
+ return f_false;
+ }
- return f_false;
- }
-#endif // _di_f_utf_is_space_
+ // Tai Tham: U+1AAE to U+1AAF.
+ if (character >= 0xe1aaae00 && character <= 0xe1aaaf00) {
+ return f_false;
+ }
-#ifndef _di_f_utf_is_substitute_
- f_return_status f_utf_is_substitute(const f_string character, const unsigned short max_width) {
- #ifndef _di_level_0_parameter_checking_
- if (max_width < 1) return f_status_set_error(f_invalid_parameter);
- #endif // _di_level_0_parameter_checking_
+ // Tai Tham: U+1A5F.
+ if (character == 0xe1a99f00) {
+ return f_false;
+ }
- unsigned short width = f_macro_utf_byte_width_is(*character);
+ // Tai Viet: U+AAC3 to U+AADA.
+ if (character >= 0xeaab8300 && character <= 0xeaab9a00) {
+ return f_false;
+ }
- if (width == 0) {
- // there is no substitute character in ASCII.
- return f_false;
- }
- else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
- }
+ // Tamil: U+0B80, U+0B81.
+ if (character == 0xe0ae8000 || character == 0xe0ae8100) {
+ return f_false;
+ }
- if (width > max_width) {
- return f_status_set_error(f_maybe);
- }
+ // Tamil: U+0B8B to U+0B8D.
+ if (character >= 0xe0ae8b00 && character <= 0xe0ae8d00) {
+ return f_false;
+ }
- if (width == 2) {
- if (!memcmp(character, f_utf_substitute_middle_dot, width)) {
- return f_true;
+ // Tamil: U+0B96 to U+0B98.
+ if (character >= 0xe0ae9600 && character <= 0xe0ae9800) {
+ return f_false;
}
- return f_false;
- }
+ // Tamil: U+0BA0 to U+0BA2.
+ if (character >= 0xe0aea000 && character <= 0xe0aea200) {
+ return f_false;
+ }
- if (width == 3) {
- if (!memcmp(character, f_utf_substitute_symbol_blank, width)) {
- return f_true;
+ // Tamil: U+0BA5 to U+0BA7.
+ if (character >= 0xe0aea500 && character <= 0xe0aea700) {
+ return f_false;
}
- if (!memcmp(character, f_utf_substitute_symbol_space, width)) {
- return f_true;
+ // Tamil: U+0BAB to U+0BAD.
+ if (character >= 0xe0aeab00 && character <= 0xe0aead00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_substitute_open_box, width)) {
- return f_true;
+ // Tamil: U+0BBA to U+0BBD.
+ if (character >= 0xe0aeba00 && character <= 0xe0aebd00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_substitute_open_box_shouldered, width)) {
- return f_true;
+ // Tamil: U+0BC3 to U+0BC5.
+ if (character >= 0xe0af8300 && character <= 0xe0af8500) {
+ return f_false;
}
- return f_false;
- }
+ // Tamil: U+0BCE, U+0BCF.
+ if (character == 0xe0af8e00 || character == 0xe0af8f00) {
+ return f_false;
+ }
- return f_false;
- }
-#endif // _di_f_utf_is_substitute_
+ // Tamil: U+0BD1 to U+0BD6.
+ if (character >= 0xe0af9100 && character <= 0xe0af9600) {
+ return f_false;
+ }
-#ifndef _di_f_utf_is_whitespace_
- f_return_status f_utf_is_whitespace(const f_string character, const unsigned short max_width) {
- #ifndef _di_level_0_parameter_checking_
- if (max_width < 1) return f_status_set_error(f_invalid_parameter);
- #endif // _di_level_0_parameter_checking_
+ // Tamil: U+0BD8 to U+0BE5.
+ if (character >= 0xe0af9800 && character <= 0xe0af9800) {
+ return f_false;
+ }
- unsigned short width = f_macro_utf_byte_width_is(*character);
+ // Tamil: U+0BFB to U+0BFF.
+ if (character >= 0xe0afbb00 && character <= 0xe0afbf00) {
+ return f_false;
+ }
- if (width == 0) {
- if (isspace(*character)) {
- return f_true;
+ // Tamil: U+0B84, U+0B91, U+0BC9.
+ if (character == 0xe0ae8400 || character == 0xe0ae9100 || character == 0xe0af8900) {
+ return f_false;
}
- return f_false;
- }
- else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
- }
+ // Telugu: U+0C3A to U+0C3C.
+ if (character >= 0xe0b0ba00 && character <= 0xe0b0bc00) {
+ return f_false;
+ }
- if (width > max_width) {
- return f_status_set_error(f_maybe);
- }
+ // Telugu: U+0C4E to U+0C54.
+ if (character >= 0xe0b18e00 && character <= 0xe0b19400) {
+ return f_false;
+ }
- if (width == 2) {
- if (!memcmp(character, f_utf_space_no_break, width)) {
- return f_true;
+ // Telugu: U+0C5B to U+0C5F.
+ if (character >= 0xe0b19b00 && character <= 0xe0b19f00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_line_feed_reverse, width)) {
- return f_true;
+ // Telugu: U+0C64, U+0C65.
+ if (character == 0xe0b1a400 || character == 0xe0b1a500) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_line_next, width)) {
- return f_true;
+ // Telugu: U+0C70 to U+0C77.
+ if (character >= 0xe0b1b000 && character <= 0xe0b1b700) {
+ return f_false;
}
- return f_false;
- }
+ // Telugu: U+0C04, U+0C0D, U+0C29.
+ if (character == 0xe0b08400 || character == 0xe0b08d00 || character == 0xe0b0a900) {
+ return f_false;
+ }
- if (width == 3) {
- if (!memcmp(character, f_utf_space_no_break_narrow, width)) {
- return f_true;
+ // Telugu: U+0C45, U+0C49, U+0C57.
+ if (character == 0xe0b18500 || character == 0xe0b18900 || character == 0xe0b19700) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_en, width)) {
- return f_true;
+ // Thai: U+0E5C to U+0E7F.
+ if (character >= 0xe0b99c00 && character <= 0xe0b9bf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_en_quad, width)) {
- return f_true;
+ // Thai: U+0E3B to U+0E3E.
+ if (character >= 0xe0b8bb00 && character <= 0xe0b8be00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_en_quad, width)) {
- return f_true;
+ // Thai: U+0E00.
+ if (character == 0xe0b88000) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em, width)) {
- return f_true;
+ // Tibetan: U+0FDB to U+0FFF.
+ if (character >= 0xe0bf9b00 && character <= 0xe0bfbf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em_quad, width)) {
- return f_true;
+ // Tibetan: U+0F6D to U+0F70.
+ if (character >= 0xe0bdad00 && character <= 0xe0bdb000) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em_per_three, width)) {
- return f_true;
+ // Tibetan: U+0F48, U+0F98, U+0FBD, U+0FCD
+ if (character == 0xe0bd8800 || character == 0xe0be9800 || character == 0xe0bebd00 || character == 0xe0bf8d) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em_per_four, width)) {
- return f_true;
+ // Tifinagh: U+2D68 to U+2D6E.
+ if (character >= 0xe2b5a800 && character <= 0xe2b5ae00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_em_per_six, width)) {
- return f_true;
+ // Tifinagh: U+2D71 to U+2D7E.
+ if (character >= 0xe2b5b100 && character <= 0xe2b5be00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_figure, width)) {
- return f_true;
+ // Unified Canadian Aboriginal Syllabics Extended: U+18F6 to U+18FF.
+ if (character >= 0xe1a3b600 && character <= 0xe1a3bf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_punctuation, width)) {
- return f_true;
+ // Vai: U+A62C to U+A63F.
+ if (character >= 0xea98ac00 && character <= 0xea98bf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_thin, width)) {
- return f_true;
+ // Vedic Extensions: U+1CF7 and U+1CFA to U+1CFF.
+ if (character == 0xe1b3b700 || character >= 0xe1b3ba00 && character <= 0xe1b3bf00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_hair, width)) {
- return f_true;
+ // Vertical Forms: U+FE10 to U+FE1F.
+ if (character >= 0xefb89000 && character <= 0xefb89f00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_separator_line, width)) {
- return f_true;
+ // Yi Radicals: U+A4C7 to U+A4CF.
+ if (character >= 0xea938700 && character <= 0xea938f00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_separator_paragraph, width)) {
- return f_true;
+ // Yi Syllables: U+A48D to U+A48F.
+ if (character >= 0xea928d00 && character <= 0xea928f00) {
+ return f_false;
}
- if (!memcmp(character, f_utf_space_ogham, width)) {
- return f_true;
+ // Specials: U+FFF0 to U+FFF8.
+ if (character >= 0xefbfb000 && character <= 0xefbfb800) {
+ return f_false;
+ }
+
+ // Specials: U+FFFE to U+FFFF.
+ if (character >= 0xefbfbe00 && character <= 0xefbfbf00) {
+ return f_false;
}
+ }
- if (!memcmp(character, f_utf_space_ideographic, width)) {
+ if (width == 4) {
+ // Consider all private use codes as valid, U+F0000 to U+FFFFF.
+ if (character >= 0xf3b08080 && character <= 0xf3bfbfbf) {
return f_true;
}
- if (!memcmp(character, f_utf_space_medium_mathematical, width)) {
+ // Consider all private use codes as valid, U+100000 to U+10FFFF.
+ if (character >= 0xf4808080 && character <= 0xf48fbfbf) {
return f_true;
}
- return f_false;
- }
+ // Sharada: U+111CE, U+111CF.
+ if (character == 0xf091878e || character == 0xf091878f) {
+ return f_false;
+ }
- return f_false;
- }
-#endif // _di_f_utf_is_whitespace_
+ // Shorthand Format Controls: U+1BCA4 to U+1BCAF.
+ if (character >= 0xf09bb2a4 && character <= 0xf09bb2af) {
+ return f_false;
+ }
-#ifndef _di_f_utf_is_bom_character_
- f_return_status f_utf_is_bom_character(const f_utf_character character) {
- if (character == f_utf_character_mask_bom) {
- return f_true;
- }
+ // Siddham: U+115DE to U+115FF.
+ if (character >= 0xf091979e && character <= 0xf09197bf) {
+ return f_false;
+ }
- return f_false;
- }
-#endif // _di_f_utf_is_bom_character_
+ // Siddham: U+115B6, U+115B7.
+ if (character == 0xf09196b6 || character == 0xf09196b7) {
+ return f_false;
+ }
-#ifndef _di_f_utf_is_graph_character_
- f_return_status f_utf_is_graph_character(const f_utf_character character) {
- // for now, just assume that any non-whitespace, non-substitute utf-8 character is a graph.
- f_status status = f_utf_is_space_character(character);
+ // Sinhala Archaic Numbers: U+111F5 to U+111FF.
+ if (character >= 0xf09187b5 && character <= 0xf09187bf) {
+ return f_false;
+ }
- if (f_status_is_error(status)) {
- return status;
- }
- else if (status == f_true) {
- return f_false;
- }
+ // Sinhala Archaic Numbers: U+1F93F.
+ if (character == 0xf09187a0) {
+ return f_false;
+ }
- if (f_utf_is_bom_character(character) == f_true) {
- return f_false;
- }
-
- return f_true;
- }
-#endif // _di_f_utf_is_graph_character_
-
-#ifndef _di_f_utf_is_space_character_
- f_return_status f_utf_is_space_character(const f_utf_character character) {
- unsigned short width = f_macro_utf_character_width_is(character);
-
- if (width == 0) {
- int8_t ascii = character >> 24;
-
- if (isspace(ascii)) {
- return f_true;
+ // Sora Sompeng: U+110E9 to U+110EF.
+ if (character >= 0xf09183a9 && character <= 0xf09183af) {
+ return f_false;
}
- return f_false;
- }
- else if (width == 1) {
- return f_status_is_error(f_invalid_utf);
- }
-
- f_bool is_big_endian = f_utf_is_big_endian();
-
- if (width == 2) {
- uint16_t utf = 0;
- if (is_big_endian) {
- utf = (uint16_t) (character >> 16);
- }
- else {
- utf = (f_macro_utf_character_to_char_2(character) << 8) | f_macro_utf_character_to_char_1(character);
+ // Sora Sompeng: U+110FA to U+110FF.
+ if (character >= 0xf09183ba && character <= 0xf09183bf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_no_break, width)) {
- return f_true;
+ // Supplemental Arrows-C: U+1F80C to U+1F80F.
+ if (character >= 0xf09fa08c && character <= 0xf09fa08f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_line_feed_reverse, width)) {
- return f_true;
+ // Supplemental Arrows-C: U+1F848 to U+1F84F.
+ if (character >= 0xf09fa188 && character <= 0xf09fa18f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_line_next, width)) {
- return f_true;
+ // Supplemental Arrows-C: U+1F85A to U+1F85F.
+ if (character >= 0xf09fa19a && character <= 0xf09fa19f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_substitute_middle_dot, width)) {
- return f_true;
+ // Supplemental Arrows-C: U+1F8AE to U+1F8FF.
+ if (character >= 0xf09fa2ae && character <= 0xf09fa3bf) {
+ return f_false;
}
- return f_false;
- }
-
- if (width == 3) {
- uint32_t utf = 0;
- if (is_big_endian) {
- utf = character;
- }
- else {
- utf = (f_macro_utf_character_to_char_3(character) << 24) | (f_macro_utf_character_to_char_2(character) << 16) | (f_macro_utf_character_to_char_1(character) << 8);
+ // Supplemental Symbols and Pictographs: U+1F900 to U+1F90F.
+ if (character >= 0xf09fa480 && character <= 0xf09fa48f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_no_break_narrow, width)) {
- return f_true;
+ // Supplemental Symbols and Pictographs: U+1F928 to U+1F92F.
+ if (character >= 0xf09fa4a8 && character <= 0xf09fa4af) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_en, width)) {
- return f_true;
+ // Supplemental Symbols and Pictographs: U+1F94C to U+1F94F.
+ if (character >= 0xf09fa58c && character <= 0xf09fa58f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_en_quad, width)) {
- return f_true;
+ // Supplemental Symbols and Pictographs: U+1F960 to U+1F97F.
+ if (character >= 0xf09fa5a0 && character <= 0xf09fa5bf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_en_quad, width)) {
- return f_true;
+ // Supplemental Symbols and Pictographs: U+1F992 to U+1F9BF.
+ if (character >= 0xf09fa692 && character <= 0xf09fa6bf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_em, width)) {
- return f_true;
+ // Supplemental Symbols and Pictographs: U+1F9C1 to U+1F9FF.
+ if (character >= 0xf09fa781 && character <= 0xf09fa7bf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_em_quad, width)) {
- return f_true;
+ // Supplemental Symbols and Pictographs: U+1F91F, U+1F931, U+1F932.
+ if (character == 0xf09fa49f || character == 0xf09fa4b1 || character == 0xf09fa4b2) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_em_per_three, width)) {
- return f_true;
+ // Supplemental Symbols and Pictographs: U+1F93F, U+1F95F.
+ if (character == 0xf09fa4bf || character == 0xf09fa59f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_em_per_four, width)) {
- return f_true;
+ // Sutton SignWriting: U+1DA8C to U+1DA9A.
+ if (character >= 0xf09daa8c && character <= 0xf09daa9a) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_em_per_six, width)) {
- return f_true;
+ // Tags: U+E0000, U+E0002 to U+E001F.
+ if (character == 0xf3a08080 || character >= 0xf3a08082 && character <= 0xf3a081bf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_figure, width)) {
- return f_true;
+ // Tai Xuan Jing Symbols: U+1D357 to U+1D35F.
+ if (character >= 0xf09d8d97 && character <= 0xf09d8d9f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_punctuation, width)) {
- return f_true;
+ // Takri: U+116B8 to U+116BF.
+ if (character >= 0xf0919ab8 && character <= 0xf0919abf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_thin, width)) {
- return f_true;
+ // Takri: U+116CA to U+116CF.
+ if (character >= 0xf0919b8a && character <= 0xf0919b8f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_hair, width)) {
- return f_true;
+ // Tangut: U+187ED to U+187FF.
+ if (character >= 0xf0989fad && character <= 0xf0989fbf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_separator_line, width)) {
- return f_true;
+ // Tangut Components: U+18AF3 to U+18AFF.
+ if (character >= 0xf098abb3 && character <= 0xf098abbf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_separator_paragraph, width)) {
- return f_true;
+ // Tirhuta: U+114C8 to U+114CF.
+ if (character >= 0xf0919388 && character <= 0xf091938f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_ogham, width)) {
- return f_true;
+ // Tirhuta: U+114DA to U+114DF.
+ if (character >= 0xf091939a && character <= 0xf091939f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_ideographic, width)) {
- return f_true;
+ // Transport and Map Symbols: U+1F6D3 to U+1F6DF.
+ if (character >= 0xf09f9b93 && character <= 0xf09f9b9f) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_space_medium_mathematical, width)) {
- return f_true;
+ // Transport and Map Symbols: U+1F6ED to U+1F6EF.
+ if (character >= 0xf09f9bad && character <= 0xf09f9baf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_substitute_symbol_blank, width)) {
- return f_true;
+ // Transport and Map Symbols: U+1F6F7 to U+1F6FF.
+ if (character >= 0xf09f9bb7 && character <= 0xf09f9bbf) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_substitute_symbol_space, width)) {
- return f_true;
+ // Ugaritic: U+1039E.
+ if (character == 0xf0908e9e) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_substitute_open_box, width)) {
- return f_true;
+ // Warang Citi: U+118F3 to U+118FE.
+ if (character >= 0xf091a3b3 && character <= 0xf091a3be) {
+ return f_false;
}
- if (!memcmp(&utf, f_utf_substitute_open_box_shouldered, width)) {
- return f_true;
+ // Unicode (and therefore UTF-8) does not support representing any character greater than this (U+10FFFF).
+ if (character > 0xf48fbfbf) {
+ return f_false;
}
-
- return f_false;
}
- return f_false;
+ return f_true;
}
-#endif // _di_f_utf_is_space_character_
+#endif // _di_f_utf_character_is_value_
-#ifndef _di_f_utf_is_substitute_character_
- f_return_status f_utf_is_substitute_character(const f_utf_character character) {
+#ifndef _di_f_utf_character_is_whitespace_
+ f_return_status f_utf_character_is_whitespace(const f_utf_character character) {
unsigned short width = f_macro_utf_character_width_is(character);
if (width == 0) {
- // there is no substitute character in ASCII.
+ int8_t ascii = character >> 24;
+
+ if (isspace(ascii)) {
+ return f_true;
+ }
+
return f_false;
}
- else if (width == 1) {
+
+ if (width == 1) {
return f_status_is_error(f_invalid_utf);
}
- f_bool is_big_endian = f_utf_is_big_endian();
+ // Latin-1 Supplement: U+00A0, U+00AD.
+ if (character == 0xc2a00000 || character == 0xc2ad0000) {
+ return f_true;
+ }
- if (width == 2) {
- uint16_t utf = 0;
- if (is_big_endian) {
- utf = (uint16_t) (character >> 16);
- }
- else {
- utf = (f_macro_utf_character_to_char_2(character) << 8) | f_macro_utf_character_to_char_1(character);
- }
+ // Tags: U+E0020.
+ if (character == 0xf3a08080) {
+ return f_true;
+ }
- if (!memcmp(&utf, f_utf_substitute_middle_dot, width)) {
- return f_true;
- }
+ return f_false;
+ }
+#endif // _di_f_utf_character_is_whitespace_
- return f_false;
+#ifndef _di_f_utf_character_to_char_
+ f_return_status f_utf_character_to_char(const f_utf_character utf_character, f_string *character, unsigned short *max_width) {
+ #ifndef _di_level_0_parameter_checking_
+ if (utf_character == 0) return f_status_set_error(f_invalid_parameter);
+ if (max_width == 0 && *character != 0) return f_status_set_error(f_invalid_parameter);
+ if (max_width != 0 && *character == 0) return f_status_set_error(f_invalid_parameter);
+ if (max_width != 0 && *max_width > 4) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ f_status status = f_none;
+
+ unsigned short width = f_macro_utf_character_width_is(utf_character);
+
+ if (max_width == 0) {
+ f_macro_string_new(status, *character, width);
+
+ if (f_status_is_error(status)) return status;
+
+ width = 1;
+ *max_width = 1;
+ }
+ else if (width == 1) {
+ return f_status_is_error(f_invalid_utf);
+ }
+ else if (width > *max_width) {
+ return f_status_set_error(f_failure);
}
- if (width == 3) {
+ *max_width = width;
+
+ if (f_utf_is_big_endian()) {
+ memcpy(*character, &utf_character, sizeof(int8_t) * width);
+ }
+ else {
uint32_t utf = 0;
- if (is_big_endian) {
- utf = character;
+
+ if (width == 1) {
+ utf = f_macro_utf_character_to_char_1(utf_character) << 24;
}
- else {
- utf = (f_macro_utf_character_to_char_3(character) << 24) | (f_macro_utf_character_to_char_2(character) << 16) | (f_macro_utf_character_to_char_1(character) << 8);
+ else if (width == 2) {
+ utf = (f_macro_utf_character_to_char_2(utf_character) << 24) | (f_macro_utf_character_to_char_1(utf_character) << 16);
}
-
- if (!memcmp(&utf, f_utf_substitute_symbol_blank, width)) {
- return f_true;
+ else if (width == 3) {
+ utf = (f_macro_utf_character_to_char_3(utf_character) << 24) | (f_macro_utf_character_to_char_2(utf_character) << 16) | (f_macro_utf_character_to_char_1(utf_character) << 8);
}
-
- if (!memcmp(&utf, f_utf_substitute_symbol_space, width)) {
- return f_true;
+ else if (width == 4) {
+ utf = (f_macro_utf_character_to_char_4(utf_character) << 24) | (f_macro_utf_character_to_char_3(utf_character) << 16) | (f_macro_utf_character_to_char_2(utf_character) << 8) | f_macro_utf_character_to_char_1(utf_character);
}
- if (!memcmp(&utf, f_utf_substitute_open_box, width)) {
- return f_true;
- }
+ memcpy(*character, &utf, sizeof(int8_t) * width);
+ }
- if (!memcmp(&utf, f_utf_substitute_open_box_shouldered, width)) {
- return f_true;
- }
+ return f_none;
+ }
+#endif // _di_f_utf_character_to_char_
- return f_false;
+#ifndef _di_f_utf_is_big_endian_
+ f_return_status f_utf_is_big_endian() {
+ uint16_t test_int = (0x01 << 8) | 0x02;
+ int8_t test_char[2] = {0x01, 0x02};
+
+ if (!memcmp(&test_int, test_char, 2)) {
+ return f_true;
}
return f_false;
}
-#endif // _di_f_utf_is_substitute_character_
+#endif // _di_f_utf_is_big_endian_
-#ifndef _di_f_utf_is_whitespace_character_
- f_return_status f_utf_is_whitespace_character(const f_utf_character character) {
- unsigned short width = f_macro_utf_character_width_is(character);
+#ifndef _di_f_utf_is_
+ f_return_status f_utf_is(const f_string character, const unsigned short max_width) {
+ #ifndef _di_level_0_parameter_checking_
+ if (max_width < 1) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ unsigned short width = f_macro_utf_byte_width_is(*character);
if (width == 0) {
- int8_t ascii = character >> 24;
+ return f_false;
+ }
- if (isspace(ascii)) {
- return f_true;
- }
+ if (width == 1) {
+ return f_status_is_error(f_incomplete_utf);
+ }
+
+ return f_true;
+ }
+#endif // _di_f_utf_is_
+
+#ifndef _di_f_utf_is_bom_
+ f_return_status f_utf_is_bom(const f_string character, const unsigned short max_width) {
+ #ifndef _di_level_0_parameter_checking_
+ if (max_width < 1) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ unsigned short width = f_macro_utf_byte_width_is(*character);
+ if (width == 0) {
return f_false;
}
- else if (width == 1) {
- return f_status_is_error(f_invalid_utf);
- }
- f_bool is_big_endian = f_utf_is_big_endian();
+ if (width == 1) {
+ return f_status_is_error(f_incomplete_utf);
+ }
- if (width == 2) {
- uint16_t utf = 0;
- if (is_big_endian) {
- utf = (uint16_t) (character >> 16);
- }
- else {
- utf = (f_macro_utf_character_to_char_2(character) << 8) | f_macro_utf_character_to_char_1(character);
- }
+ if (width > max_width) {
+ return f_status_set_error(f_maybe);
+ }
- if (!memcmp(&utf, f_utf_space_no_break, width)) {
+ if (width == 3) {
+ if (!memcmp(character, f_utf_bom, width)) {
return f_true;
}
+ }
- if (!memcmp(&utf, f_utf_space_line_feed_reverse, width)) {
- return f_true;
- }
+ return f_false;
+ }
+#endif // _di_f_utf_is_bom_
+
+#ifndef _di_f_utf_is_control_
+ f_return_status f_utf_is_control(const f_string character, const unsigned short max_width) {
+ #ifndef _di_level_0_parameter_checking_
+ if (max_width < 1) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_0_parameter_checking_
- if (!memcmp(&utf, f_utf_space_line_next, width)) {
+ if (f_macro_utf_byte_width_is(*character) == 0) {
+ if (iscntrl(*character)) {
return f_true;
}
return f_false;
}
- if (width == 3) {
- uint32_t utf = 0;
- if (is_big_endian) {
- utf = character;
- }
- else {
- utf = (f_macro_utf_character_to_char_3(character) << 24) | (f_macro_utf_character_to_char_2(character) << 16) | (f_macro_utf_character_to_char_1(character) << 8);
- }
+ if (width == 1) {
+ return f_status_is_error(f_incomplete_utf);
+ }
- if (!memcmp(&utf, f_utf_space_no_break_narrow, width)) {
- return f_true;
- }
+ f_utf_character character_utf = 0;
+ f_status status = 0;
- if (!memcmp(&utf, f_utf_space_en, width)) {
- return f_true;
- }
+ status = f_utf_char_to_control(character, max_width, &character_utf);
- if (!memcmp(&utf, f_utf_space_en_quad, width)) {
- return f_true;
- }
+ if (status != f_none) return status;
- if (!memcmp(&utf, f_utf_space_en_quad, width)) {
- return f_true;
- }
+ return f_utf_character_is_control(character);
+ }
+#endif // _di_f_utf_is_control_
- if (!memcmp(&utf, f_utf_space_em, width)) {
- return f_true;
- }
+#ifndef _di_f_utf_is_control_picture_
+ f_return_status f_utf_is_control_picture(const f_string character, const unsigned short max_width) {
+ #ifndef _di_level_0_parameter_checking_
+ if (max_width < 1) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_0_parameter_checking_
- if (!memcmp(&utf, f_utf_space_em_quad, width)) {
- return f_true;
- }
+ // There are not ASCII control pictures.
+ if (f_macro_utf_byte_width_is(*character) == 0) {
+ return f_false;
+ }
- if (!memcmp(&utf, f_utf_space_em_per_three, width)) {
- return f_true;
- }
+ if (width == 1) {
+ return f_status_is_error(f_incomplete_utf);
+ }
- if (!memcmp(&utf, f_utf_space_em_per_four, width)) {
- return f_true;
- }
+ f_utf_character character_utf = 0;
+ f_status status = 0;
- if (!memcmp(&utf, f_utf_space_em_per_six, width)) {
- return f_true;
- }
+ status = f_utf_char_to_character(character, max_width, &character_utf);
- if (!memcmp(&utf, f_utf_space_figure, width)) {
- return f_true;
- }
+ if (status != f_none) return status;
- if (!memcmp(&utf, f_utf_space_punctuation, width)) {
- return f_true;
- }
+ return f_utf_character_is_control_picture(character);
+ }
+#endif // _di_f_utf_is_control_picture_
- if (!memcmp(&utf, f_utf_space_thin, width)) {
- return f_true;
- }
+#ifndef _di_f_utf_is_graph_
+ f_return_status f_utf_is_graph(const f_string character, const unsigned short max_width) {
+ #ifndef _di_level_0_parameter_checking_
+ if (max_width < 1) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_0_parameter_checking_
- if (!memcmp(&utf, f_utf_space_hair, width)) {
+ if (f_macro_utf_byte_width_is(*character) == 0) {
+ if (isgraph(*character)) {
return f_true;
}
- if (!memcmp(&utf, f_utf_space_separator_line, width)) {
- return f_true;
- }
+ return f_false;
+ }
- if (!memcmp(&utf, f_utf_space_separator_paragraph, width)) {
- return f_true;
- }
+ if (width == 1) {
+ return f_status_is_error(f_incomplete_utf);
+ }
- if (!memcmp(&utf, f_utf_space_ogham, width)) {
- return f_true;
- }
+ f_utf_character character_utf = 0;
+ f_status status = 0;
- if (!memcmp(&utf, f_utf_space_ideographic, width)) {
- return f_true;
- }
+ status = f_utf_char_to_character(character, max_width, &character_utf);
+
+ if (status != f_none) return status;
- if (!memcmp(&utf, f_utf_space_medium_mathematical, width)) {
+ return f_utf_character_is_graph(character);
+ }
+#endif // _di_f_utf_is_graph_
+
+#ifndef _di_f_utf_is_whitespace_
+ f_return_status f_utf_is_whitespace(const f_string character, const unsigned short max_width) {
+ #ifndef _di_level_0_parameter_checking_
+ if (max_width < 1) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ unsigned short width = f_macro_utf_byte_width_is(*character);
+
+ if (width == 0) {
+ if (isspace(*character)) {
return f_true;
}
return f_false;
}
- return f_false;
+ if (width == 1) {
+ return f_status_is_error(f_incomplete_utf);
+ }
+
+ f_utf_character character_utf = 0;
+ f_status status = 0;
+
+ status = f_utf_char_to_character(character, max_width, &character_utf);
+
+ if (status != f_none) return status;
+
+ return f_utf_character_is_whitespace(character);
}
-#endif // _di_f_utf_is_whitespace_character_
+#endif // _di_f_utf_is_whitespace_
#ifndef _di_f_utf_char_to_character_
- f_return_status f_utf_char_to_character(const f_string character, const unsigned short max_width, f_utf_character *utf_character) {
+ f_return_status f_utf_char_to_character(const f_string character, const unsigned short max_width, f_utf_character *character_utf) {
#ifndef _di_level_0_parameter_checking_
if (max_width < 1) return f_status_set_error(f_invalid_parameter);
- if (utf_character == 0) return f_status_set_error(f_invalid_parameter);
+ if (character_utf == 0) return f_status_set_error(f_invalid_parameter);
#endif // _di_level_0_parameter_checking_
unsigned short width = f_macro_utf_byte_width_is(*character);
if (width == 0) {
- *utf_character = f_macro_utf_character_from_char_1(character[0]);
+ *character_utf = f_macro_utf_character_from_char_1(character[0]);
return f_none;
}
else if (width == 1) {
return f_status_set_error(f_failure);
}
- *utf_character = 0;
- *utf_character |= f_macro_utf_character_to_char_1(character[0]);
+ *character_utf = 0;
+ *character_utf |= f_macro_utf_character_to_char_1(character[0]);
if (width < 2) {
return f_none;
}
- *utf_character |= f_macro_utf_character_to_char_2(character[1]);
+ *character_utf |= f_macro_utf_character_to_char_2(character[1]);
if (width == 2) {
return f_none;
}
- *utf_character |= f_macro_utf_character_to_char_3(character[2]);
+ *character_utf |= f_macro_utf_character_to_char_3(character[2]);
if (width == 3) {
return f_none;
}
- *utf_character |= f_macro_utf_character_to_char_4(character[3]);
+ *character_utf |= f_macro_utf_character_to_char_4(character[3]);
return f_none;
}
#endif // _di_f_utf_char_to_character_
-#ifndef _di_f_utf_character_to_char_
- f_return_status f_utf_character_to_char(const f_utf_character utf_character, f_string *character, unsigned short *max_width) {
- #ifndef _di_level_0_parameter_checking_
- if (utf_character == 0) return f_status_set_error(f_invalid_parameter);
- if (max_width == 0 && *character != 0) return f_status_set_error(f_invalid_parameter);
- if (max_width != 0 && *character == 0) return f_status_set_error(f_invalid_parameter);
- if (max_width != 0 && *max_width > 4) return f_status_set_error(f_invalid_parameter);
- #endif // _di_level_0_parameter_checking_
-
- f_status status = f_none;
-
- unsigned short width = f_macro_utf_character_width_is(utf_character);
-
- if (max_width == 0) {
- f_macro_string_new(status, *character, width);
-
- if (f_status_is_error(status)) return status;
-
- width = 1;
- *max_width = 1;
- }
- else if (width == 1) {
- return f_status_is_error(f_invalid_utf);
- }
- else if (width > *max_width) {
- return f_status_set_error(f_failure);
- }
-
- *max_width = width;
-
- if (f_utf_is_big_endian()) {
- memcpy(*character, &utf_character, sizeof(int8_t) * width);
- }
- else {
- uint32_t utf = 0;
-
- if (width == 1) {
- utf = f_macro_utf_character_to_char_1(utf_character) << 24;
- }
- else if (width == 2) {
- utf = (f_macro_utf_character_to_char_2(utf_character) << 24) | (f_macro_utf_character_to_char_1(utf_character) << 16);
- }
- else if (width == 3) {
- utf = (f_macro_utf_character_to_char_3(utf_character) << 24) | (f_macro_utf_character_to_char_2(utf_character) << 16) | (f_macro_utf_character_to_char_1(utf_character) << 8);
- }
- else if (width == 4) {
- utf = (f_macro_utf_character_to_char_4(utf_character) << 24) | (f_macro_utf_character_to_char_3(utf_character) << 16) | (f_macro_utf_character_to_char_2(utf_character) << 8) | f_macro_utf_character_to_char_1(utf_character);
- }
-
- memcpy(*character, &utf, sizeof(int8_t) * width);
- }
-
- return f_none;
- }
-#endif // _di_f_utf_character_to_char_
-
#ifdef __cplusplus
} // extern "C"
#endif
*
* These are integers representing character codes that represent types of substitute spaces.
*
+ * Substitute codes are not actual codes and are actually prints of the codes so they should not be treated as the actual codes.
+ *
* This does not provide substitute whitespace codes for standard ascii whitespaces, such as '\t' or '\r'.
*/
#ifndef _di_f_utf_substitute_
#endif // _di_f_utf_substitute_
/**
- * Helper function for UTF-8 processing code to determine endianess of the system.
+ * Check to see if the entire byte block of the character is a UTF-8 character.
+ *
+ * This does not validate if the UTF-8 character is a valid UTF-8 character, for that use f_utf_character_is_valid().
*
+ * @param character
+ * The character to validate.
*
* @return
- * f_true if the system is big-endian.
- * f_false if the system is little-endian.
+ * f_true if a UTF-8 character.
+ * f_false if not a UTF-8 character.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see f_utf_character_is_valid()
*/
-#ifndef _di_f_utf_is_big_endian_
- extern f_return_status f_utf_is_big_endian();
-#endif // _di_f_utf_is_big_endian_
+#ifndef _di_f_utf_character_is_
+ extern f_return_status f_utf_character_is(const f_utf_character character);
+#endif // _di_f_utf_character_is_
/**
- * Check to see if the entire byte block of the character is a UTF-8 character.
+ * Check to see if the entire byte block of the character is a UTF-8 BOM.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * f_true if a UTF-8 BOM.
+ * f_false if not a UTF-8 BOM.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ */
+#ifndef _di_f_utf_character_is_bom_
+ extern f_return_status f_utf_character_is_bom(const f_utf_character character);
+#endif // _di_f_utf_character_is_bom_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
+ *
+ * The UTF-8 BOM is considered a control character.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * f_true if a UTF-8 control character.
+ * f_false if not a UTF-8 control character.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see iscntrl()
+ */
+#ifndef _di_f_utf_character_is_control_
+ extern f_return_status f_utf_character_is_control(const f_utf_character character);
+#endif // _di_f_utf_character_is_control_
+
+/**
+ * Check to see if the entire byte block of the character is a UTF-8 control picture character.
+ *
+ * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * f_true if a UTF-8 control picture character.
+ * f_false if not a UTF-8 control picture character.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ */
+#ifndef _di_f_utf_character_is_control_picture_
+ extern f_return_status f_utf_character_is_control_picture(const f_utf_character character);
+#endif // _di_f_utf_character_is_control_picture_
+
+/**
+ * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
+ *
+ * Characters whose width is 1-byte are invalid.
+ * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
+ *
+ * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
*
* @param character
* The character to validate.
- * There must be enough space allocated to compare against, as limited by max_width.
- * @param max_width
- * The maximum width available for checking.
- * Can be anything greater than 0.
*
* @return
* f_true if a UTF-8 character.
* f_false if not a UTF-8 character.
- * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
- * f_invalid_parameter (with error bit) if a parameter is invalid.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_valid()
*/
-#ifndef _di_f_utf_is_
- extern f_return_status f_utf_is(const f_string character, const unsigned short max_width);
-#endif // _di_f_utf_is_
+#ifndef _di_f_utf_character_is_fragment_
+ extern f_return_status f_utf_character_is_fragment(const f_utf_character character);
+#endif // _di_f_utf_character_is_fragment_
/**
- * Check to see if the entire byte block of the character is a UTF-8 BOM.
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
*
* @param character
* The character to validate.
- * There must be enough space allocated to compare against, as limited by max_width.
- * @param max_width
- * The maximum width available for checking.
- * Can be anything greater than 0.
*
* @return
- * f_true if a UTF-8 whitespace or substitute.
- * f_false if not a UTF-8 whitespace or substitute.
- * f_maybe (with error bit) if this could be a whitespace or substitute but width is not long enough.
- * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
- * f_invalid_parameter (with error bit) if a parameter is invalid.
+ * f_true if a UTF-8 graph.
+ * f_false if not a UTF-8 graph.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see isgraph()
*/
-#ifndef _di_f_utf_is_bom_
- extern f_return_status f_utf_is_bom(const f_string character, const unsigned short max_width);
-#endif // _di_f_utf_is_bom_
+#ifndef _di_f_utf_character_is_graph_
+ extern f_return_status f_utf_character_is_graph(const f_utf_character character);
+#endif // _di_f_utf_character_is_graph_
/**
- * Check to see if the entire byte block of the character is a UTF-8 character.
+ * Check to see if the entire byte block of the character is a valid UTF-8 character.
+ *
+ * This does validate if the UTF-8 character is a valid UTF-8 character.
+ * To not do this, use f_utf_character_is().
+ *
+ * This function can be expensive due to how Unicode has invalid codes spread randomly through it.
+ * For simpler error checking, try f_utf_is_fragment(), to just check that the width is valid or not.
+ * (First characters should not have a width of 1, and all other characters should have a width of 1.)
*
* @param character
* The character to validate.
* @return
* f_true if a UTF-8 character.
* f_false if not a UTF-8 character.
- * f_invalid_utf (with error bit) if character is an incomplete UTF-8 fragment.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_fragment()
+ */
+#ifndef _di_f_utf_character_is_valid_
+ extern f_return_status f_utf_character_is_valid(const f_utf_character character);
+#endif // _di_f_utf_character_is_value_
+
+/**
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space or control character.
+ *
+ * @param character
+ * The character to validate.
+ *
+ * @return
+ * f_true if a UTF-8 whitespace.
+ * f_false if not a UTF-8 whitespace.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ */
+#ifndef _di_f_utf_character_is_whitespace_
+ extern f_return_status f_utf_character_is_whitespace(const f_utf_character character);
+#endif // _di_f_utf_character_is_whitespace_
+
+/**
+ * Convert a specialized f_utf_character type to a int8_t, stored as a string (character buffer).
+ *
+ * This will also convert ASCII characters stored in the utf_character array.
+ *
+ * @param utf_character
+ * The UTF-8 characterr to convert from.
+ * @param character
+ * A int8_t representation of the UTF-8 character, stored as a string of width bytes.
+ * If max_width is 0, then this should not be allocated (set the pointer address to 0).
+ * @param max_width
+ * The number of bytes the generated character represents.
+ * If this is set to 0, then the character will be allocated and this will be set to the width of the utf_character.
+ * If this is set to some value greater than 0 (up to 4), then this represents the size of the character array (no allocations are performed).
+ * If this is greater than 0, and the utf_character width is larger than this size, then an error is returned.
+ *
+ * @return
+ * f_none if conversion was successful.
+ * f_failure (with error bit) if width is not long enough to convert.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
* f_invalid_parameter (with error bit) if a parameter is invalid.
+ * f_allocation_error (with error bit) on memory allocation error.
+ * f_failure (with error bit) if width is not long enough to convert.
*/
-#ifndef _di_f_utf_is_
- extern f_return_status f_utf_is_character(const f_utf_character character);
-#endif // _di_f_utf_is_
+#ifndef _di_f_utf_character_to_char_
+ extern f_return_status f_utf_character_to_char(const f_utf_character utf_character, f_string *character, unsigned short *max_width);
+#endif // _di_f_utf_character_to_char_
+
+/**
+ * Helper function for UTF-8 processing code to determine endianess of the system.
+ *
+ * @todo relocate this outside of f_utf into a more general path, perhaps f_memory (f_memory_is_big_endian).
+ *
+ * @return
+ * f_true if the system is big-endian.
+ * f_false if the system is little-endian.
+ */
+#ifndef _di_f_utf_is_big_endian_
+ extern f_return_status f_utf_is_big_endian();
+#endif // _di_f_utf_is_big_endian_
/**
- * Check to see if the entire byte block of the character is a UTF-8 printable character.
+ * Check to see if the entire byte block of the character is a UTF-8 character.
*
- * This does not check non-UTF-8 graph.
+ * This does not check the validity of the character, for that instead use f_utf_is_valid().
*
* @param character
* The character to validate.
* Can be anything greater than 0.
*
* @return
- * f_true if a UTF-8 graph.
- * f_false if not a UTF-8 graph.
- * f_maybe (with error bit) if this could be a graph but width is not long enough.
+ * f_true if a UTF-8 character.
+ * f_false if not a UTF-8 character.
* f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
* f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see f_utf_is_valid()
*/
-#ifndef _di_f_utf_is_graph_
- extern f_return_status f_utf_is_graph(const f_string character, const unsigned short max_width);
-#endif // _di_f_utf_is_graph_
+#ifndef _di_f_utf_is_
+ extern f_return_status f_utf_is(const f_string character, const unsigned short max_width);
+#endif // _di_f_utf_is_
/**
- * Check to see if the entire byte block of the character is a UTF-8 whitespace or substitute character.
- *
- * This does not check non-UTF-8 whitespace.
+ * Check to see if the entire byte block of the character is a UTF-8 BOM.
*
* @param character
* The character to validate.
* f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*/
-#ifndef _di_f_utf_is_space_
- extern f_return_status f_utf_is_space(const f_string character, const unsigned short max_width);
-#endif // _di_f_utf_is_space_
+#ifndef _di_f_utf_is_bom_
+ extern f_return_status f_utf_is_bom(const f_string character, const unsigned short max_width);
+#endif // _di_f_utf_is_bom_
/**
- * Check to see if the entire byte block of the character is a UTF-8 whitespace substitute character.
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 control character.
*
- * This does not check non-UTF-8 whitespace.
+ * The UTF-8 BOM is considered a control character.
*
* @param character
* The character to validate.
* Can be anything greater than 0.
*
* @return
- * f_true if a UTF-8 substitute.
- * f_false if not a UTF-8 substitute.
- * f_maybe (with error bit) if this could be a substitute but width is not long enough.
+ * f_true if a UTF-8 control character.
+ * f_false if not a UTF-8 control character.
* f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
- * f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see iscntrl()
*/
-#ifndef _di_f_utf_is_substitute_
- extern f_return_status f_utf_is_substitute(const f_string character, const unsigned short max_width);
-#endif // _di_f_utf_is_substitute_
+#ifndef _di_f_utf_is_control_
+ extern f_return_status f_utf_is_control(const f_string character, const unsigned short max_width);
+#endif // _di_f_utf_is_control_
/**
- * Check to see if the entire byte block of the character is a UTF-8 general whitespace character.
+ * Check to see if the entire byte block of the character is a UTF-8 control picture character.
*
- * This does not check non-UTF-8 whitespace.
+ * Control Picture characters are placeholders for special ASCII characters and therefore there are no ASCII Control Picture characters.
*
* @param character
* The character to validate.
* Can be anything greater than 0.
*
* @return
- * f_true if a UTF-8 whitespace.
- * f_false if not a UTF-8 whitespace.
- * f_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ * f_true if a UTF-8 control picture character.
+ * f_false if not a UTF-8 control picture character.
* f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
- * f_invalid_parameter (with error bit) if a parameter is invalid.
*/
-#ifndef _di_f_utf_is_whitespace_
- extern f_return_status f_utf_is_whitespace(const f_string character, const unsigned short max_width);
-#endif // _di_f_utf_is_whitespace_
+#ifndef _di_f_utf_is_control_picture_
+ extern f_return_status f_utf_is_control_picture(const f_string character, const unsigned short max_width);
+#endif // _di_f_utf_is_control_picture_
/**
- * Check to see if the entire byte block of the character is a UTF-8 BOM.
- *
- * @param character
- * The UTF-8 character to validate.
+ * Check to see if the entire byte block of the character is a 1-width UTF-8 character fragment.
*
- * @return
- * f_true if a UTF-8 whitespace or substitute.
- * f_false if not a UTF-8 whitespace or substitute.
- * f_maybe (with error bit) if this could be a whitespace or substitute but width is not long enough.
- * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
- * f_invalid_parameter (with error bit) if a parameter is invalid.
- */
-#ifndef _di_f_utf_is_bom_character_
- extern f_return_status f_utf_is_bom_character(const f_utf_character character);
-#endif // _di_f_utf_is_bom_character_
-
-/**
- * Check to see if the entire byte block of the character is a UTF-8 printable character.
+ * Characters whose width is 1-byte are invalid.
+ * However, the character could have been cut-off, so whether or not this is actually valid should be determined by the caller.
*
- * This does not check non-UTF-8 graph.
+ * For normal validation functions, try using f_utf_character_is() or f_utf_character_is_valid().
*
* @param character
* The character to validate.
+ * There must be enough space allocated to compare against, as limited by max_width.
+ * @param max_width
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
*
* @return
- * f_true if a UTF-8 graph.
- * f_false if not a UTF-8 graph.
- * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
- * f_invalid_parameter (with error bit) if a parameter is invalid.
+ * f_true if a UTF-8 character.
+ * f_false if not a UTF-8 character.
+ *
+ * @see f_utf_character_is()
+ * @see f_utf_character_is_valid()
*/
-#ifndef _di_f_utf_is_graph_character_
- extern f_return_status f_utf_is_graph_character(const f_utf_character character);
-#endif // _di_f_utf_is_graph_character_
+#ifndef _di_f_utf_is_fragment_
+ extern f_return_status f_utf_is_fragment(const f_string character, const unsigned short max_width);
+#endif // _di_f_utf_is_fragment_
/**
- * Check to see if the entire byte block of the character is a UTF-8 whitespace or substitute character.
- *
- * This does not check non-UTF-8 whitespace.
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 printable character.
*
* @param character
* The character to validate.
+ * There must be enough space allocated to compare against, as limited by max_width.
+ * @param max_width
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
*
* @return
- * f_true if a UTF-8 whitespace or substitute.
- * f_false if not a UTF-8 whitespace or substitute.
- * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ * f_true if a UTF-8 graph.
+ * f_false if not a UTF-8 graph.
+ * f_maybe (with error bit) if this could be a graph but width is not long enough.
+ * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
* f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see isgraph()
+ * @see iscntrl()
*/
-#ifndef _di_f_utf_is_space_character_
- extern f_return_status f_utf_is_space_character(const f_utf_character character);
-#endif // _di_f_utf_is_space_character_
+#ifndef _di_f_utf_is_graph_
+ extern f_return_status f_utf_is_graph(const f_string character, const unsigned short max_width);
+#endif // _di_f_utf_is_graph_
/**
- * Check to see if the entire byte block of the character is a UTF-8 whitespace substitute character.
+ * Check to see if the entire byte block of the character is a UTF-8 character and if that character is a valid UTF-8.
*
- * This does not check non-UTF-8 whitespace.
+ * This does check the validity of the character, to not do this use f_utf_is().
+ *
+ * This function can be expensive due to how Unicode has invalid codes spread randomly through it.
+ * For simpler error checking, try f_utf_is_fragment(), to just check that the width is valid or not.
+ * (First characters should have a width of not 1, and all other characters should not have a width of 1.)
*
* @param character
* The character to validate.
+ * There must be enough space allocated to compare against, as limited by max_width.
+ * @param max_width
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
*
* @return
- * f_true if a UTF-8 substitute.
- * f_false if not a UTF-8 substitute.
- * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ * f_true if a valid UTF-8 character.
+ * f_false if not a valid UTF-8 character.
+ * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
* f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see f_utf_is()
+ * @see f_utf_is_fragment()
*/
-#ifndef _di_f_utf_is_substitute_character_
- extern f_return_status f_utf_is_substitute_character(const f_utf_character character);
-#endif // _di_f_utf_is_substitute_character_
+#ifndef _di_f_utf_is_valid_
+ extern f_return_status f_utf_is_valid(const f_string character, const unsigned short max_width);
+#endif // _di_f_utf_is_valid_
/**
- * Check to see if the entire byte block of the character is a UTF-8 general whitespace character.
- *
- * This does not check non-UTF-8 whitespace.
+ * Check to see if the entire byte block of the character is an ASCII or UTF-8 general space or control character.
*
* @param character
* The character to validate.
+ * There must be enough space allocated to compare against, as limited by max_width.
+ * @param max_width
+ * The maximum width available for checking.
+ * Can be anything greater than 0.
*
* @return
* f_true if a UTF-8 whitespace.
* f_false if not a UTF-8 whitespace.
- * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ * f_maybe (with error bit) if this could be a whitespace but width is not long enough.
+ * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
* f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see isspace()
+ * @see iscntrl()
*/
-#ifndef _di_f_utf_is_whitespace_character_
- extern f_return_status f_utf_is_whitespace_character(const f_utf_character character);
-#endif // _di_f_utf_is_whitespace_character_
+#ifndef _di_f_utf_is_whitespace_
+ extern f_return_status f_utf_is_whitespace(const f_string character, const unsigned short max_width);
+#endif // _di_f_utf_is_whitespace_
/**
- * Convert a UTF-8 character, stored as a string (character buffer), to the specialized f_utf_character type.
- *
- * This will also convert ASCII characters.
+ * Convert an ASCII or UTF-8 character, stored as a string (character buffer), to the specialized f_utf_character type.
*
* @param character
* The character string to be converted to the f_utf_character type.
* @param max_width
* The maximum width available for converting.
* Can be anything greater than 0.
- * @param utf_character
+ * @param character_utf
* The generated character of type f_utf_character.
* This value may be cleared, even on error.
*
* f_invalid_parameter (with error bit) if a parameter is invalid.
*/
#ifndef _di_f_utf_char_to_character_
- extern f_return_status f_utf_char_to_character(const f_string character, const unsigned short max_width, f_utf_character *utf_character);
+ extern f_return_status f_utf_char_to_character(const f_string character, const unsigned short max_width, f_utf_character *character_utf);
#endif // _di_f_utf_char_to_character_
-/**
- * Convert a specialized f_utf_character type to a int8_t, stored as a string (character buffer).
- *
- * This will also convert ASCII characters stored in the utf_character array.
- *
- * @param utf_character
- * The UTF-8 characterr to convert from.
- * @param character
- * A int8_t representation of the UTF-8 character, stored as a string of width bytes.
- * If max_width is 0, then this should not be allocated (set the pointer address to 0).
- * @param max_width
- * The number of bytes the generated character represents.
- * If this is set to 0, then the character will be allocated and this will be set to the width of the utf_character.
- * If this is set to some value greater than 0 (up to 4), then this represents the size of the character array (no allocations are performed).
- * If this is greater than 0, and the utf_character width is larger than this size, then an error is returned.
- *
- * @return
- * f_none if conversion was successful.
- * f_failure (with error bit) if width is not long enough to convert.
- * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
- * f_invalid_parameter (with error bit) if a parameter is invalid.
- * f_allocation_error (with error bit) on memory allocation error.
- * f_failure (with error bit) if width is not long enough to convert.
- */
-#ifndef _di_f_utf_character_to_char_
- extern f_return_status f_utf_character_to_char(const f_utf_character utf_character, f_string *character, unsigned short *max_width);
-#endif // _di_f_utf_character_to_char_
-
#ifdef __cplusplus
} // extern "C"
#endif