Expand the UTF-8 character type (a 4-byte wide character represented as a big-endian 32-bit integer) into working like f_string and f_dynamic_string.
Provide all similar functionality.
I have decided that the isgraph(), isspace(), etc.. functions fo UTF-8 should also call the ASCII equivalents.
Update all relating code.
Use memcmp() and memcpy() for comparing UTF-8 characters class (4-byte integer) to the UTF-8 char strings (multiple 1-byte char).
When doing this, make sure to do so with the proper endianess.
Add missing f_utf_character_to_char() function.
Wrap some of the macros parameters in parenthesis for safety reasons.
Add f_utf_is_big_endian() and document its use.
Provide custom EOL, EOS, and placeholder defines for UTF characters (4-byte integers).
build_linker ar
build_libraries -lc
build_libraries_fll -lfll_0
-build_sources_library level_1/colors.c level_1/console.c level_1/directory.c level_1/status.c level_1/file.c level_1/fss.c level_1/fss_basic.c level_1/fss_basic_list.c level_1/fss_extended.c level_1/program.c level_1/serialized.c level_1/strings.c
+build_sources_library level_1/colors.c level_1/console.c level_1/directory.c level_1/status.c level_1/file.c level_1/fss.c level_1/fss_basic.c level_1/fss_basic_list.c level_1/fss_extended.c level_1/program.c level_1/serialized.c level_1/strings.c level_1/utf.c
build_sources_program
-build_sources_headers level_1/colors.h level_1/console.h level_1/directory.h level_1/status.h level_1/file.h level_1/fss.h level_1/fss_basic.h level_1/fss_basic_list.h level_1/fss_status.h level_1/fss_extended.h level_1/fss_macro.h level_1/program.h level_1/serialized.h level_1/strings.h
+build_sources_headers level_1/colors.h level_1/console.h level_1/directory.h level_1/status.h level_1/file.h level_1/fss.h level_1/fss_basic.h level_1/fss_basic_list.h level_1/fss_status.h level_1/fss_extended.h level_1/fss_macro.h level_1/program.h level_1/serialized.h level_1/strings.h level_1/utf.h
build_shared yes
build_static yes
build_linker ar
build_libraries -lc
build_libraries_fll
-build_sources_library level_0/console.c level_0/conversion.c level_0/file.c level_0/memory.c level_0/pipe.c level_0/print.c level_0/utf.c level_1/colors.c level_1/console.c level_1/directory.c level_1/status.c level_1/file.c level_1/fss.c level_1/fss_basic.c level_1/fss_basic_list.c level_1/fss_extended.c level_1/program.c level_1/serialized.c level_1/strings.c level_2/colors.c level_2/execute.c level_2/status.c
+build_sources_library level_0/console.c level_0/conversion.c level_0/file.c level_0/memory.c level_0/pipe.c level_0/print.c level_0/utf.c level_1/colors.c level_1/console.c level_1/directory.c level_1/status.c level_1/file.c level_1/fss.c level_1/fss_basic.c level_1/fss_basic_list.c level_1/fss_extended.c level_1/program.c level_1/serialized.c level_1/strings.c level_1/utf.c level_2/colors.c level_2/execute.c level_2/status.c
build_sources_program
-build_sources_headers level_0/colors.h level_0/console.h level_0/conversion.h level_0/status.h level_0/file.h level_0/fss.h level_0/memory.h level_0/fll_paths.h level_0/filesystem_paths.h level_0/pipe.h level_0/print.h level_0/serialized.h level_0/strings.h level_0/types.h level_0/types_array.h level_0/utf.h level_1/colors.h level_1/console.h level_1/directory.h level_1/status.h level_1/file.h level_1/fss.h level_1/fss_basic.h level_1/fss_basic_list.h level_1/fss_status.h level_1/fss_extended.h level_1/fss_macro.h level_1/program.h level_1/serialized.h level_1/strings.h level_2/colors.h level_2/execute.h level_2/status.h level_2/fss_basic.h level_2/fss_basic_list.h level_2/fss_extended.h level_2/fss_status.h
+build_sources_headers level_0/colors.h level_0/console.h level_0/conversion.h level_0/status.h level_0/file.h level_0/fss.h level_0/memory.h level_0/fll_paths.h level_0/filesystem_paths.h level_0/pipe.h level_0/print.h level_0/serialized.h level_0/strings.h level_0/types.h level_0/types_array.h level_0/utf.h level_1/colors.h level_1/console.h level_1/directory.h level_1/status.h level_1/file.h level_1/fss.h level_1/fss_basic.h level_1/fss_basic_list.h level_1/fss_status.h level_1/fss_extended.h level_1/fss_macro.h level_1/program.h level_1/serialized.h level_1/strings.h level_1/utf.h level_2/colors.h level_2/execute.h level_2/status.h level_2/fss_basic.h level_2/fss_basic_list.h level_2/fss_extended.h level_2/fss_status.h
build_sources_bash
build_sources_settings
build_shared yes
extern "C" {
#endif
+#ifndef _di_f_utf_is_big_endian_
+ f_return_status f_utf_is_big_endian() {
+ uint16_t test_int = (0x01 << 8) | 0x02;
+ char test_char[2] = {0x01, 0x02};
+
+ if (!memcmp(&test_int, test_char, 2)) {
+ return f_true;
+ }
+
+ return f_false;
+ }
+#endif // _di_f_utf_is_big_endian_
+
#ifndef _di_f_utf_is_
f_return_status f_utf_is(const f_string character, const f_u_short max_width) {
#ifndef _di_level_0_parameter_checking_
return f_false;
}
else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
+ return f_status_is_error(f_invalid_utf);
}
return f_true;
if (max_width < 1) return f_status_set_error(f_invalid_parameter);
#endif // _di_level_0_parameter_checking_
- f_u_short width = f_macro_utf_byte_width_is(*character);
+ if (f_macro_utf_byte_width_is(*character) == 0) {
+ if (isgraph(*character)) {
+ return f_true;
+ }
- if (width == 0) {
return f_false;
}
- else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
- }
- // Do not operate on UTF-8 fragments that are not the first byte of the character.
- if (width == 1) {
- return f_status_set_error(f_incomplete_utf);
- }
+ // For now, just assume that any non-whitespace, non-substitute UTF-8 character is a graph.
+ f_status status = f_utf_is_space(character, max_width);
- if (width > max_width) {
- return f_status_set_error(f_maybe);
+ if (f_status_is_error(status)) {
+ return status;
}
-
- // for now, just assume that any non-whitespace, non-substitute utf-8 character is a graph.
-
- if (f_utf_is_space(character, max_width) == f_true) {
+ else if (status == f_true) {
return f_false;
}
f_u_short width = f_macro_utf_byte_width_is(*character);
if (width == 0) {
+ if (isspace(*character)) {
+ return f_true;
+ }
+
return f_false;
}
else if (width == 1) {
f_u_short width = f_macro_utf_byte_width_is(*character);
if (width == 0) {
+ // there is no substitute character in ASCII.
return f_false;
}
else if (width == 1) {
f_u_short width = f_macro_utf_byte_width_is(*character);
if (width == 0) {
+ if (isspace(*character)) {
+ return f_true;
+ }
+
return f_false;
}
else if (width == 1) {
#ifndef _di_f_utf_is_graph_character_
f_return_status f_utf_is_graph_character(const f_utf_character character) {
- f_u_short width = f_macro_utf_character_width_is(character);
-
- if (width == 0) {
- return f_false;
- }
- else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
- }
-
// for now, just assume that any non-whitespace, non-substitute utf-8 character is a graph.
+ f_status status = f_utf_is_space_character(character);
- if (f_utf_is_space_character(character) == f_true) {
+ if (f_status_is_error(status)) {
+ return status;
+ }
+ else if (status == f_true) {
return f_false;
}
f_u_short width = f_macro_utf_character_width_is(character);
if (width == 0) {
+ char ascii = character >> 24;
+
+ if (isspace(ascii)) {
+ return f_true;
+ }
+
return f_false;
}
else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
+ return f_status_is_error(f_invalid_utf);
}
+ f_bool is_big_endian = f_utf_is_big_endian();
+
if (width == 2) {
- char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) };
+ uint16_t utf = 0;
+ if (is_big_endian) {
+ utf = (uint16_t) (character >> 16);
+ }
+ else {
+ utf = (f_macro_utf_character_to_char_2(character) << 8) | f_macro_utf_character_to_char_1(character);
+ }
- if (utf[0] == f_utf_space_no_break[0] && utf[1] == f_utf_space_no_break[1]) {
+ if (!memcmp(&utf, f_utf_space_no_break, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_line_feed_reverse[0] && utf[1] == f_utf_space_line_feed_reverse[1]) {
+ if (!memcmp(&utf, f_utf_space_line_feed_reverse, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_line_next[0] && utf[1] == f_utf_space_line_next[1]) {
+ if (!memcmp(&utf, f_utf_space_line_next, width)) {
return f_true;
}
- if (utf[0] == f_utf_substitute_middle_dot[0] && utf[1] == f_utf_substitute_middle_dot[1]) {
+ if (!memcmp(&utf, f_utf_substitute_middle_dot, width)) {
return f_true;
}
}
if (width == 3) {
- char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) };
+ uint32_t utf = 0;
+ if (is_big_endian) {
+ utf = character;
+ }
+ else {
+ utf = (f_macro_utf_character_to_char_3(character) << 24) | (f_macro_utf_character_to_char_2(character) << 16) | (f_macro_utf_character_to_char_1(character) << 8);
+ }
- if (utf[0] == f_utf_space_no_break_narrow[0] && utf[1] == f_utf_space_no_break_narrow[1] && utf[2] == f_utf_space_no_break_narrow[2]) {
+ if (!memcmp(&utf, f_utf_space_no_break_narrow, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_en[0] && utf[1] == f_utf_space_en[1] && utf[2] == f_utf_space_en[2]) {
+ if (!memcmp(&utf, f_utf_space_en, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) {
+ if (!memcmp(&utf, f_utf_space_en_quad, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) {
+ if (!memcmp(&utf, f_utf_space_en_quad, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em[0] && utf[1] == f_utf_space_em[1] && utf[2] == f_utf_space_em[2]) {
+ if (!memcmp(&utf, f_utf_space_em, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em_quad[0] && utf[1] == f_utf_space_em_quad[1] && utf[2] == f_utf_space_em_quad[2]) {
+ if (!memcmp(&utf, f_utf_space_em_quad, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em_per_three[0] && utf[1] == f_utf_space_em_per_three[1] && utf[2] == f_utf_space_em_per_three[2]) {
+ if (!memcmp(&utf, f_utf_space_em_per_three, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em_per_four[0] && utf[1] == f_utf_space_em_per_four[1] && utf[2] == f_utf_space_em_per_four[2]) {
+ if (!memcmp(&utf, f_utf_space_em_per_four, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em_per_six[0] && utf[1] == f_utf_space_em_per_six[1] && utf[2] == f_utf_space_em_per_six[2]) {
+ if (!memcmp(&utf, f_utf_space_em_per_six, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_figure[0] && utf[1] == f_utf_space_figure[1] && utf[2] == f_utf_space_figure[2]) {
+ if (!memcmp(&utf, f_utf_space_figure, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_punctuation[0] && utf[1] == f_utf_space_punctuation[1] && utf[2] == f_utf_space_punctuation[2]) {
+ if (!memcmp(&utf, f_utf_space_punctuation, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_thin[0] && utf[1] == f_utf_space_thin[1] && utf[2] == f_utf_space_thin[2]) {
+ if (!memcmp(&utf, f_utf_space_thin, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_hair[0] && utf[1] == f_utf_space_hair[1] && utf[2] == f_utf_space_hair[2]) {
+ if (!memcmp(&utf, f_utf_space_hair, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_separator_line[0] && utf[1] == f_utf_space_separator_line[1] && utf[2] == f_utf_space_separator_line[2]) {
+ if (!memcmp(&utf, f_utf_space_separator_line, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_separator_paragraph[0] && utf[1] == f_utf_space_separator_paragraph[1] && utf[2] == f_utf_space_separator_paragraph[2]) {
+ if (!memcmp(&utf, f_utf_space_separator_paragraph, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_ogham[0] && utf[1] == f_utf_space_ogham[1] && utf[2] == f_utf_space_ogham[2]) {
+ if (!memcmp(&utf, f_utf_space_ogham, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_ideographic[0] && utf[1] == f_utf_space_ideographic[1] && utf[2] == f_utf_space_ideographic[2]) {
+ if (!memcmp(&utf, f_utf_space_ideographic, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_medium_mathematical[0] && utf[1] == f_utf_space_medium_mathematical[1] && utf[2] == f_utf_space_medium_mathematical[2]) {
+ if (!memcmp(&utf, f_utf_space_medium_mathematical, width)) {
return f_true;
}
- if (utf[0] == f_utf_substitute_symbol_blank[0] && utf[1] == f_utf_substitute_symbol_blank[1] && utf[2] == f_utf_substitute_symbol_blank[2]) {
+ if (!memcmp(&utf, f_utf_substitute_symbol_blank, width)) {
return f_true;
}
- if (utf[0] == f_utf_substitute_symbol_space[0] && utf[1] == f_utf_substitute_symbol_space[1] && utf[2] == f_utf_substitute_symbol_space[2]) {
+ if (!memcmp(&utf, f_utf_substitute_symbol_space, width)) {
return f_true;
}
- if (utf[0] == f_utf_substitute_open_box[0] && utf[1] == f_utf_substitute_open_box[1] && utf[2] == f_utf_substitute_open_box[2]) {
+ if (!memcmp(&utf, f_utf_substitute_open_box, width)) {
return f_true;
}
- if (utf[0] == f_utf_substitute_open_box_shouldered[0] && utf[1] == f_utf_substitute_open_box_shouldered[1] && utf[2] == f_utf_substitute_open_box_shouldered[2]) {
+ if (!memcmp(&utf, f_utf_substitute_open_box_shouldered, width)) {
return f_true;
}
f_u_short width = f_macro_utf_character_width_is(character);
if (width == 0) {
+ // there is no substitute character in ASCII.
return f_false;
}
else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
+ return f_status_is_error(f_invalid_utf);
}
+ f_bool is_big_endian = f_utf_is_big_endian();
+
if (width == 2) {
- char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) };
+ uint16_t utf = 0;
+ if (is_big_endian) {
+ utf = (uint16_t) (character >> 16);
+ }
+ else {
+ utf = (f_macro_utf_character_to_char_2(character) << 8) | f_macro_utf_character_to_char_1(character);
+ }
- if (utf[0] == f_utf_substitute_middle_dot[0] && utf[1] == f_utf_substitute_middle_dot[1]) {
+ if (!memcmp(&utf, f_utf_substitute_middle_dot, width)) {
return f_true;
}
}
if (width == 3) {
- char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) };
+ uint32_t utf = 0;
+ if (is_big_endian) {
+ utf = character;
+ }
+ else {
+ utf = (f_macro_utf_character_to_char_3(character) << 24) | (f_macro_utf_character_to_char_2(character) << 16) | (f_macro_utf_character_to_char_1(character) << 8);
+ }
- if (utf[0] == f_utf_substitute_symbol_blank[0] && utf[1] == f_utf_substitute_symbol_blank[1] && utf[2] == f_utf_substitute_symbol_blank[2]) {
+ if (!memcmp(&utf, f_utf_substitute_symbol_blank, width)) {
return f_true;
}
- if (utf[0] == f_utf_substitute_symbol_space[0] && utf[1] == f_utf_substitute_symbol_space[1] && utf[2] == f_utf_substitute_symbol_space[2]) {
+ if (!memcmp(&utf, f_utf_substitute_symbol_space, width)) {
return f_true;
}
- if (utf[0] == f_utf_substitute_open_box[0] && utf[1] == f_utf_substitute_open_box[1] && utf[2] == f_utf_substitute_open_box[2]) {
+ if (!memcmp(&utf, f_utf_substitute_open_box, width)) {
return f_true;
}
- if (utf[0] == f_utf_substitute_open_box_shouldered[0] && utf[1] == f_utf_substitute_open_box_shouldered[1] && utf[2] == f_utf_substitute_open_box_shouldered[2]) {
+ if (!memcmp(&utf, f_utf_substitute_open_box_shouldered, width)) {
return f_true;
}
f_u_short width = f_macro_utf_character_width_is(character);
if (width == 0) {
+ char ascii = character >> 24;
+
+ if (isspace(ascii)) {
+ return f_true;
+ }
+
return f_false;
}
else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
+ return f_status_is_error(f_invalid_utf);
}
- // Do not operate on UTF-8 fragments that are not the first byte of the character.
- if (width == 1) {
- return f_status_set_error(f_incomplete_utf);
- }
+ f_bool is_big_endian = f_utf_is_big_endian();
if (width == 2) {
- char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) };
+ uint16_t utf = 0;
+ if (is_big_endian) {
+ utf = (uint16_t) (character >> 16);
+ }
+ else {
+ utf = (f_macro_utf_character_to_char_2(character) << 8) | f_macro_utf_character_to_char_1(character);
+ }
- if (utf[0] == f_utf_space_no_break[0] && utf[1] == f_utf_space_no_break[1]) {
+ if (!memcmp(&utf, f_utf_space_no_break, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_line_feed_reverse[0] && utf[1] == f_utf_space_line_feed_reverse[1]) {
+ if (!memcmp(&utf, f_utf_space_line_feed_reverse, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_line_next[0] && utf[1] == f_utf_space_line_next[1]) {
+ if (!memcmp(&utf, f_utf_space_line_next, width)) {
return f_true;
}
}
if (width == 3) {
- char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) };
+ uint32_t utf = 0;
+ if (is_big_endian) {
+ utf = character;
+ }
+ else {
+ utf = (f_macro_utf_character_to_char_3(character) << 24) | (f_macro_utf_character_to_char_2(character) << 16) | (f_macro_utf_character_to_char_1(character) << 8);
+ }
- if (utf[0] == f_utf_space_no_break_narrow[0] && utf[1] == f_utf_space_no_break_narrow[1] && utf[2] == f_utf_space_no_break_narrow[2]) {
+ if (!memcmp(&utf, f_utf_space_no_break_narrow, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_en[0] && utf[1] == f_utf_space_en[1] && utf[2] == f_utf_space_en[2]) {
+ if (!memcmp(&utf, f_utf_space_en, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) {
+ if (!memcmp(&utf, f_utf_space_en_quad, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) {
+ if (!memcmp(&utf, f_utf_space_en_quad, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em[0] && utf[1] == f_utf_space_em[1] && utf[2] == f_utf_space_em[2]) {
+ if (!memcmp(&utf, f_utf_space_em, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em_quad[0] && utf[1] == f_utf_space_em_quad[1] && utf[2] == f_utf_space_em_quad[2]) {
+ if (!memcmp(&utf, f_utf_space_em_quad, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em_per_three[0] && utf[1] == f_utf_space_em_per_three[1] && utf[2] == f_utf_space_em_per_three[2]) {
+ if (!memcmp(&utf, f_utf_space_em_per_three, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em_per_four[0] && utf[1] == f_utf_space_em_per_four[1] && utf[2] == f_utf_space_em_per_four[2]) {
+ if (!memcmp(&utf, f_utf_space_em_per_four, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_em_per_six[0] && utf[1] == f_utf_space_em_per_six[1] && utf[2] == f_utf_space_em_per_six[2]) {
+ if (!memcmp(&utf, f_utf_space_em_per_six, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_figure[0] && utf[1] == f_utf_space_figure[1] && utf[2] == f_utf_space_figure[2]) {
+ if (!memcmp(&utf, f_utf_space_figure, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_punctuation[0] && utf[1] == f_utf_space_punctuation[1] && utf[2] == f_utf_space_punctuation[2]) {
+ if (!memcmp(&utf, f_utf_space_punctuation, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_thin[0] && utf[1] == f_utf_space_thin[1] && utf[2] == f_utf_space_thin[2]) {
+ if (!memcmp(&utf, f_utf_space_thin, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_hair[0] && utf[1] == f_utf_space_hair[1] && utf[2] == f_utf_space_hair[2]) {
+ if (!memcmp(&utf, f_utf_space_hair, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_separator_line[0] && utf[1] == f_utf_space_separator_line[1] && utf[2] == f_utf_space_separator_line[2]) {
+ if (!memcmp(&utf, f_utf_space_separator_line, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_separator_paragraph[0] && utf[1] == f_utf_space_separator_paragraph[1] && utf[2] == f_utf_space_separator_paragraph[2]) {
+ if (!memcmp(&utf, f_utf_space_separator_paragraph, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_ogham[0] && utf[1] == f_utf_space_ogham[1] && utf[2] == f_utf_space_ogham[2]) {
+ if (!memcmp(&utf, f_utf_space_ogham, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_ideographic[0] && utf[1] == f_utf_space_ideographic[1] && utf[2] == f_utf_space_ideographic[2]) {
+ if (!memcmp(&utf, f_utf_space_ideographic, width)) {
return f_true;
}
- if (utf[0] == f_utf_space_medium_mathematical[0] && utf[1] == f_utf_space_medium_mathematical[1] && utf[2] == f_utf_space_medium_mathematical[2]) {
+ if (!memcmp(&utf, f_utf_space_medium_mathematical, width)) {
return f_true;
}
return f_none;
}
else if (width == 1) {
- return f_status_is_error(f_incomplete_utf);
+ return f_status_is_error(f_invalid_utf);
}
if (width > max_width) {
return f_status_set_error(f_failure);
}
- memset(utf_character, 0, sizeof(f_utf_character));
-
+ *utf_character = 0;
*utf_character |= f_macro_utf_character_to_char_1(character[0]);
if (width < 2) {
}
#endif // _di_f_utf_char_to_character_
+#ifndef _di_f_utf_character_to_char_
+ f_return_status f_utf_character_to_char(const f_utf_character utf_character, f_string *character, f_u_short *max_width) {
+ #ifndef _di_level_0_parameter_checking_
+ if (utf_character == 0) return f_status_set_error(f_invalid_parameter);
+ if (max_width == 0 && *character != 0) return f_status_set_error(f_invalid_parameter);
+ if (max_width != 0 && *character == 0) return f_status_set_error(f_invalid_parameter);
+ if (max_width != 0 && *max_width > 4) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_0_parameter_checking_
+
+ f_status status = f_none;
+
+ f_u_short width = f_macro_utf_character_width_is(utf_character);
+
+ if (max_width == 0) {
+ f_new_string(status, *character, width);
+
+ if (f_status_is_error(status)) return status;
+
+ width = 1;
+ *max_width = 1;
+ }
+ else if (width == 1) {
+ return f_status_is_error(f_invalid_utf);
+ }
+ else if (width > *max_width) {
+ return f_status_set_error(f_failure);
+ }
+
+ *max_width = width;
+
+ if (f_utf_is_big_endian()) {
+ memcpy(*character, &utf_character, sizeof(char) * width);
+ }
+ else {
+ uint32_t utf = 0;
+
+ if (width == 1) {
+ utf = f_macro_utf_character_to_char_1(utf_character) << 24;
+ }
+ else if (width == 2) {
+ utf = (f_macro_utf_character_to_char_2(utf_character) << 24) | (f_macro_utf_character_to_char_1(utf_character) << 16);
+ }
+ else if (width == 3) {
+ utf = (f_macro_utf_character_to_char_3(utf_character) << 24) | (f_macro_utf_character_to_char_2(utf_character) << 16) | (f_macro_utf_character_to_char_1(utf_character) << 8);
+ }
+ else if (width == 4) {
+ utf = (f_macro_utf_character_to_char_4(utf_character) << 24) | (f_macro_utf_character_to_char_3(utf_character) << 16) | (f_macro_utf_character_to_char_2(utf_character) << 8) | f_macro_utf_character_to_char_1(utf_character);
+ }
+
+ memcpy(*character, &utf, sizeof(char) * width);
+ }
+
+ return f_none;
+ }
+#endif // _di_f_utf_character_to_char_
+
#ifdef __cplusplus
} // extern "C"
#endif
#define _F_utf_h
// libc includes
+#include <ctype.h>
#include <string.h>
// fll includes
#define f_utf_byte_off_3 0xf0 // 1111 0000
#define f_utf_byte_off_4 0xf8 // 1111 1000
- #define f_macro_utf_byte_is(character) (character & f_utf_byte_1)
+ #define f_macro_utf_byte_is(character) ((character) & f_utf_byte_1)
- #define f_macro_utf_byte_is_1(character) ((character & f_utf_byte_off_1) == f_utf_byte_1) // (10xx xxxx & 1100 0000) == 1000 0000
- #define f_macro_utf_byte_is_2(character) ((character & f_utf_byte_off_2) == f_utf_byte_2) // (110x xxxx & 1110 0000) == 1100 0000
- #define f_macro_utf_byte_is_3(character) ((character & f_utf_byte_off_3) == f_utf_byte_3) // (1110 xxxx & 1111 0000) == 1110 0000
- #define f_macro_utf_byte_is_4(character) ((character & f_utf_byte_off_4) == f_utf_byte_4) // (1111 0xxx & 1111 1000) == 1111 0000
+ #define f_macro_utf_byte_is_1(character) (((character) & f_utf_byte_off_1) == f_utf_byte_1) // (10xx xxxx & 1100 0000) == 1000 0000
+ #define f_macro_utf_byte_is_2(character) (((character) & f_utf_byte_off_2) == f_utf_byte_2) // (110x xxxx & 1110 0000) == 1100 0000
+ #define f_macro_utf_byte_is_3(character) (((character) & f_utf_byte_off_3) == f_utf_byte_3) // (1110 xxxx & 1111 0000) == 1110 0000
+ #define f_macro_utf_byte_is_4(character) (((character) & f_utf_byte_off_4) == f_utf_byte_4) // (1111 0xxx & 1111 1000) == 1111 0000
#define f_macro_utf_byte_width(character) ((!f_macro_utf_byte_is(character) || f_macro_utf_byte_is_1(character)) ? 1 : (f_macro_utf_byte_is_2(character) ? 2 : (f_macro_utf_byte_is_3(character) ? 3 : 4)))
#define f_macro_utf_byte_width_is(character) (f_macro_utf_byte_is(character) ? (f_macro_utf_byte_is_1(character) ? 1 : (f_macro_utf_byte_is_2(character) ? 2 : (f_macro_utf_byte_is_3(character) ? 3 : 4))) : 0)
*
* This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte character.
*
+ * This "character" type is stored as a big-endian 4-byte integer (32-bits).
+ * A helper function, f_utf_is_big_endian(), is provided to detect system endianness so that character arrays (char []) can be correctly processed.
+ *
* The byte structure is intended to be read left to right.
*
* The f_macro_utf_character_mask_byte_* are used to get the entire character set fo a given width.
*
* The f_macro_utf_character_width is used to determine the width of the UTF-8 character based on f_macro_utf_byte_width.
* The f_macro_utf_character_width_is is used to determine the width of the UTF-8 character based on f_macro_utf_byte_width_is.
+ *
+ * @see f_utf_is_big_endian()
*/
#ifndef _di_f_utf_character_
typedef uint32_t f_utf_character;
#define f_utf_character_mask_char_3 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000
#define f_utf_character_mask_char_4 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111
- #define f_macro_utf_character_to_char_1(character) ((f_utf_character_mask_char_1 & character) >> 24) // grab first byte.
- #define f_macro_utf_character_to_char_2(character) ((f_utf_character_mask_char_2 & character) >> 16) // grab second byte.
- #define f_macro_utf_character_to_char_3(character) ((f_utf_character_mask_char_3 & character) >> 8) // grab third byte.
- #define f_macro_utf_character_to_char_4(character) (f_utf_character_mask_char_4 & character) // grab fourth byte.
+ #define f_macro_utf_character_to_char_1(character) (((character) & f_utf_character_mask_char_1) >> 24) // grab first byte.
+ #define f_macro_utf_character_to_char_2(character) (((character) & f_utf_character_mask_char_2) >> 16) // grab second byte.
+ #define f_macro_utf_character_to_char_3(character) (((character) & f_utf_character_mask_char_3) >> 8) // grab third byte.
+ #define f_macro_utf_character_to_char_4(character) ((character) & f_utf_character_mask_char_4) // grab fourth byte.
- #define f_macro_utf_character_from_char_1(character) (character << 24) // shift the first byte.
- #define f_macro_utf_character_from_char_2(character) (character << 16) // shift the second byte.
- #define f_macro_utf_character_from_char_3(character) (character << 8) // shift the third byte.
- #define f_macro_utf_character_from_char_4(character) (character) // shift the fourth byte.
+ #define f_macro_utf_character_from_char_1(character) ((character) << 24) // shift the first byte.
+ #define f_macro_utf_character_from_char_2(character) ((character) << 16) // shift the second byte.
+ #define f_macro_utf_character_from_char_3(character) ((character) << 8) // shift the third byte.
+ #define f_macro_utf_character_from_char_4(character) ((character)) // shift the fourth byte.
#define f_macro_utf_character_width(character) (f_macro_utf_byte_width(f_macro_utf_character_to_char_1(character)))
#define f_macro_utf_character_width_is(character) (f_macro_utf_byte_width_is(f_macro_utf_character_to_char_1(character)))
#endif // _di_f_utf_character_
+#ifndef _di_f_utf_character_have_eol_
+ #define f_utf_character_eol 0x0a000000 // 0000 1010, 0000 0000, 0000 0000, 0000 0000
+#endif // _di_f_utf_character_have_eol_
+
+#ifndef _di_f_utf_character_have_eos_
+ #define f_utf_character_eos 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000
+#endif // _di_f_utf_character_have_eos_
+
+#ifndef _di_f_utf_character_have_placeholder_
+ #define f_utf_character_placeholder 0x00000000 // 0000 0000, 0000 0000, 0000 0000, 0000 0000
+#endif // _di_f_utf_character_have_placeholder_
+
+/**
+ * Provide a UTF-8 characters set to 4-bits wide as a string.
+ */
+#ifndef _di_f_utf_string_
+ typedef f_utf_character *f_utf_string;
+
+ #define f_utf_string_max_size f_signed_long_size
+ #define f_utf_string_initialize f_eos
+
+ #define f_new_utf_char(status, string, length) status = f_new_array((void **) & string, sizeof(f_utf_string), length)
+ #define f_delete_utf_char(status, string, size) status = f_delete((void **) & string, sizeof(f_utf_string), size)
+ #define f_destroy_utf_char(status, string, size) status = f_destroy((void **) & string, sizeof(f_utf_string), size)
+
+ #define f_resize_utf_char(status, string, old_length, new_length) \
+ status = f_resize((void **) & string, sizeof(f_utf_string), old_length, new_length)
+
+ #define f_adjust_utf_char(status, string, old_length, new_length) \
+ status = f_adjust((void **) & string, sizeof(f_utf_string), old_length, new_length)
+#endif // _di_f_utf_string_
+
+/**
+ * Provide a type specifically for UTF-8 strings.
+ */
+#ifndef _di_f_utf_string_length_
+ typedef f_s_long f_utf_string_length;
+
+ #define f_new_utf_string_length(status, string, length) status = f_new_array((void **) & string, sizeof(f_utf_string_length), length)
+ #define f_delete_utf_string_length(status, string, length) status = f_delete((void **) & string, sizeof(f_utf_string_length), length)
+ #define f_destroy_utf_string_length(status, string, size) status = f_destroy((f_void_P *) & string, sizeof(f_utf_string_length), size)
+
+ #define f_resize_utf_string_length(status, length, old_length, new_length) \
+ status = f_resize((void **) & length, sizeof(f_utf_string_length), old_length, new_length)
+
+ #define f_adjust_utf_string_length(status, length, old_length, new_length) \
+ status = f_adjust((void **) & length, sizeof(f_utf_string_length), old_length, new_length)
+#endif // _di_f_utf_string_length_
+
+/**
+ * size: total amount of allocated space.
+ * used: total number of allocated spaces used.
+ */
+#ifndef _di_f_utf_string_lengths_
+ typedef struct {
+ f_utf_string_length *array;
+ f_array_length size;
+ f_array_length used;
+ } f_utf_string_lengths;
+
+ #define f_utf_string_lengths_initialize { 0, 0, 0 }
+
+ #define f_new_utf_string_lengths(status, lengths) \
+ f_new_structure(status, lengths, f_utf_string_length)
+
+ #define f_delete_utf_string_lengths(status, lengths) \
+ f_delete_structure(status, lengths, f_utf_string_length)
+
+ #define f_destroy_utf_string_lengths(status, lengths) \
+ f_destroy_structure(status, lengths, f_utf_string_length)
+
+ #define f_resize_utf_string_lengths(status, lengths, new_length) \
+ f_resize_structure(status, lengths, f_utf_string_length, new_length)
+
+ #define f_adjust_utf_string_lengths(status, lengths, new_length) \
+ f_adjust_structure(status, lengths, f_utf_string_length, new_length)
+#endif // _di_f_utf_string_lengths_
+
+/**
+ * designates a start and stop position that represents a sub-string inside of some parent string.
+ * use this to avoid resizing, restructuring, and reallocating the parent string to separate the sub-string.
+ */
+#ifndef _di_f_utf_string_location_
+ typedef struct {
+ f_utf_string_length start;
+ f_utf_string_length stop;
+ } f_utf_string_location;
+
+ #define f_utf_string_location_initialize { 1, 0 }
+
+ #define f_new_utf_string_location(status, utf_string_location, length) status = f_new_array((void **) & utf_string_location, sizeof(f_utf_string_location), length)
+ #define f_delete_utf_string_location(status, utf_string_location, size) status = f_delete((void **) & utf_string_location, sizeof(f_utf_string_location), size)
+ #define f_destroy_utf_string_location(status, utf_string_location, size) status = f_destroy((void **) & utf_string_location, sizeof(f_utf_string_location), size)
+
+ #define f_resize_utf_string_location(status, utf_string_location, old_length, new_length) \
+ status = f_resize((void **) & utf_string_location, sizeof(f_utf_string_location), old_length, new_length)
+
+ #define f_adjust_utf_string_location(status, utf_string_location, old_length, new_length) \
+ status = f_adjust((void **) & utf_string_location, sizeof(f_utf_string_location), old_length, new_length)
+#endif // _di_f_utf_string_location_
+
+/**
+ * an array of string locations.
+ *
+ * size: total amount of allocated space.
+ * used: total number of allocated spaces used.
+ */
+#ifndef _di_f_utf_string_locations_
+ typedef struct {
+ f_utf_string_location *array;
+ f_array_length size;
+ f_array_length used;
+ } f_utf_string_locations;
+
+ #define f_utf_string_locations_initialize {0, 0, 0}
+
+ #define f_clear_utf_string_locations(locations) \
+ f_clear_structure(locations)
+
+ #define f_new_utf_string_locations(status, locations, length) \
+ f_new_structure(status, locations, f_utf_string_location, length)
+
+ #define f_delete_utf_string_locations(status, locations) \
+ f_delete_structure(status, locations, f_utf_string_location)
+
+ #define f_destroy_utf_string_locations(status, locations) \
+ f_destroy_structure(status, locations, f_utf_string_location)
+
+ #define f_resize_utf_string_locations(status, locations, new_length) \
+ f_resize_structure(status, locations, f_utf_string_location, new_length)
+
+ #define f_adjust_utf_string_locations(status, locations, new_length) \
+ f_adjust_structure(status, locations, f_utf_string_location, new_length)
+#endif // _di_f_utf_string_locations_
+
+/**
+ * a string that supports contains a size attribute to handle dynamic allocations and deallocations.
+ * save the string size along with the string, so that strlen(..) commands can be avoided as much as possible.
+ *
+ * size: total amount of allocated space.
+ * used: total number of allocated spaces used.
+ */
+#ifndef _di_f_utf_string_dynamic_
+ typedef struct {
+ f_utf_string string;
+ f_utf_string_length size;
+ f_utf_string_length used;
+ } f_utf_string_dynamic;
+
+ #define f_utf_string_dynamic_initialize { f_utf_string_initialize, 0, 0 }
+
+ #define f_clear_utf_string_dynamic(dynamic) \
+ dynamic.string = 0; \
+ dynamic.size = 0; \
+ dynamic.used = 0;
+
+ #define f_new_utf_string_dynamic(status, dynamic, new_length) \
+ f_clear_utf_string_dynamic(dynamic) \
+ status = f_new_array((void **) & dynamic.string, sizeof(f_utf_string), new_length); \
+ if (status == f_none) { \
+ dynamic.size = new_length; \
+ dynamic.used = 0; \
+ }
+
+ #define f_delete_utf_string_dynamic(status, dynamic) \
+ status = f_delete((void **) & dynamic.string, sizeof(f_utf_string), dynamic.size); \
+ if (status == f_none) { \
+ dynamic.size = 0; \
+ dynamic.used = 0; \
+ }
+
+ #define f_destroy_utf_string_dynamic(status, dynamic) \
+ status = f_destroy((void **) & dynamic.string, sizeof(f_utf_string), dynamic.size); \
+ if (status == f_none) { \
+ dynamic.size = 0; \
+ dynamic.used = 0; \
+ }
+
+ #define f_resize_utf_string_dynamic(status, dynamic, new_length) \
+ status = f_resize((void **) & dynamic.string, sizeof(f_utf_string), dynamic.size, new_length); \
+ if (status == f_none) { \
+ dynamic.size = new_length; \
+ if (dynamic.used > dynamic.size) dynamic.used = new_length; \
+ }
+
+ #define f_adjust_utf_string_dynamic(status, dynamic, new_length) \
+ status = f_adjust((void **) & dynamic.string, sizeof(f_utf_string), dynamic.size, new_length); \
+ if (status == f_none) { \
+ dynamic.size = new_length; \
+ if (dynamic.used > dynamic.size) dynamic.used = new_length; \
+ }
+#endif // _di_f_utf_string_dynamic_
+
+/**
+ * an array of dynamic utf_strings.
+ *
+ * size: total amount of allocated space.
+ * used: total number of allocated spaces used.
+ */
+#ifndef _di_f_utf_string_dynamics_
+ typedef struct {
+ f_utf_string_dynamic *array;
+ f_utf_string_length size;
+ f_utf_string_length used;
+ } f_utf_string_dynamics;
+
+ #define f_utf_string_dynamics_initialize { 0, 0, 0 }
+
+ #define f_clear_utf_string_dynamics(dynamics) \
+ dynamics.array = 0; \
+ dynamics.size = 0; \
+ dynamics.used = 0;
+
+ #define f_new_utf_string_dynamics(status, dynamics, length) \
+ dynamics.array = 0; \
+ dynamics.size = 0; \
+ dynamics.used = 0; \
+ status = f_new_array((void **) & dynamics.array, sizeof(f_utf_string_dynamic), length); \
+ if (status == f_none) { \
+ dynamics.size = length; \
+ dynamics.used = 0; \
+ }
+
+ #define f_delete_utf_string_dynamics(status, dynamics) \
+ status = f_none; \
+ while (dynamics.size > 0) { \
+ --dynamics.size; \
+ f_destroy_utf_string_dynamic(status, dynamics.array[dynamics.size]); \
+ if (status != f_none) break; \
+ } \
+ if (status == f_none) status = f_delete((void **) & dynamics.array, sizeof(f_utf_string_dynamic), dynamics.size); \
+ if (status == f_none) dynamics.used = 0;
+
+ #define f_destroy_utf_string_dynamics(status, dynamics) \
+ status = f_none; \
+ while (dynamics.size > 0) { \
+ --dynamics.size; \
+ f_destroy_utf_string_dynamic(status, dynamics.array[dynamics.size]); \
+ if (status != f_none) break; \
+ } \
+ if (status == f_none) status = f_destroy((void **) & dynamics.array, sizeof(f_utf_string_dynamic), dynamics.size); \
+ if (status == f_none) dynamics.used = 0;
+
+ #define f_resize_utf_string_dynamics(status, dynamics, new_length) \
+ status = f_none; \
+ if (new_length < dynamics.size) { \
+ f_utf_string_length i = dynamics.size - new_length; \
+ for (; i < dynamics.size; ++i) { \
+ f_destroy_utf_string_dynamic(status, dynamics.array[i]); \
+ if (status != f_none) break; \
+ } \
+ } \
+ if (status == f_none) status = f_resize((void **) & dynamics.array, sizeof(f_utf_string_dynamic), dynamics.size, new_length); \
+ if (status == f_none) { \
+ if (new_length > dynamics.size) { \
+ f_utf_string_length i = dynamics.size; \
+ for (; i < new_length; ++i) { \
+ memset(&dynamics.array[i], 0, sizeof(f_utf_string_dynamic)); \
+ } \
+ } \
+ dynamics.size = new_length; \
+ if (dynamics.used > dynamics.size) dynamics.used = new_length; \
+ }
+
+ #define f_adjust_utf_string_dynamics(status, dynamics, new_length) \
+ status = f_none; \
+ if (new_length < dynamics.size) { \
+ f_utf_string_length i = dynamics.size - new_length; \
+ for (; i < dynamics.size; ++i) { \
+ f_destroy_utf_string_dynamic(status, dynamics.array[i], f_utf_string_dynamic); \
+ if (status != f_none) break; \
+ } \
+ } \
+ if (status == f_none) status = f_adjust((void **) & dynamics.array, sizeof(f_utf_string_dynamic), dynamics.size, new_length); \
+ if (status == f_none) { \
+ if (new_length > dynamics.size) { \
+ f_utf_string_length i = dynamics.size; \
+ for (; i < new_length; ++i) { \
+ memset(&dynamics.array[i], 0, sizeof(f_utf_string_dynamic)); \
+ } \
+ } \
+ dynamics.size = new_length; \
+ if (dynamics.used > dynamics.size) dynamics.used = new_length; \
+ }
+#endif // _di_f_utf_string_dynamic_
+
/**
* Define the UTF-8 general whitespace codes.
*
#endif // _di_f_utf_substitute_
/**
+ * Helper function for UTF-8 processing code to determine endianess of the system.
+ *
+ *
+ * @return
+ * f_true if the system is big-endian.
+ * f_false if the system is little-endian.
+ */
+#ifndef _di_f_utf_is_big_endian_
+ extern f_return_status f_utf_is_big_endian();
+#endif // _di_f_utf_is_big_endian_
+
+/**
* Check to see if the entire byte block of the character is a UTF-8 character.
*
* @param character
* @return
* f_true if a UTF-8 character.
* f_false if not a UTF-8 character.
- * f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
+ * f_invalid_utf (with error bit) if character is an incomplete UTF-8 fragment.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*/
#ifndef _di_f_utf_is_
extern f_return_status f_utf_char_to_character(const f_string character, const f_u_short max_width, f_utf_character *utf_character);
#endif // _di_f_utf_char_to_character_
+/**
+ * Convert a specialized f_utf_character type to a char, stored as a string (character buffer).
+ *
+ * This will also convert ASCII characters stored in the utf_character array.
+ *
+ * @param utf_character
+ * The UTF-8 characterr to convert from.
+ * @param character
+ * A char representation of the UTF-8 character, stored as a string of width bytes.
+ * If max_width is 0, then this should not be allocated (set the pointer address to 0).
+ * @param max_width
+ * The number of bytes the generated character represents.
+ * If this is set to 0, then the character will be allocated and this will be set to the width of the utf_character.
+ * If this is set to some value greater than 0 (up to 4), then this represents the size of the character array (no allocations are performed).
+ * If this is greater than 0, and the utf_character width is larger than this size, then an error is returned.
+ *
+ * @return
+ * f_none if conversion was successful.
+ * f_failure (with error bit) if width is not long enough to convert.
+ * f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ * f_allocation_error (with error bit) on memory allocation error.
+ * f_failure (with error bit) if width is not long enough to convert.
+ */
+#ifndef _di_f_utf_character_to_char_
+ extern f_return_status f_utf_character_to_char(const f_utf_character utf_character, f_string *character, f_u_short *max_width);
+#endif // _di_f_utf_character_to_char_
+
#ifdef __cplusplus
} // extern "C"
#endif
if (input.start >= buffer.used) return f_status_set_error(f_invalid_parameter);
#endif // _di_level_1_parameter_checking_
- f_u_short utf_width = f_macro_utf_byte_width_is(buffer.string[input.start]);
-
- if (utf_width == 0) {
- if (isgraph(buffer.string[input.start])) {
- return f_true;
- }
-
- return f_false;
- }
-
f_string_length max_width = (input.stop - input.start) + 1;
if (max_width > buffer.used - input.start) {
max_width = buffer.used - input.start;
}
- f_status status = f_utf_is_space(buffer.string + input.start, max_width);
-
- if (f_status_is_error(status)) {
- return status;
- }
-
- if (status == f_true) {
- return f_false;
- }
-
- return f_true;
+ return f_utf_is_graph(buffer.string + input.start, max_width);
}
#endif // _di_fl_fss_is_graph_
if (input.start >= buffer.used) return f_status_set_error(f_invalid_parameter);
#endif // _di_level_1_parameter_checking_
- f_u_short utf_width = f_macro_utf_byte_width_is(buffer.string[input.start]);
-
- if (utf_width == 0) {
- if (isspace(buffer.string[input.start])) {
- return f_true;
- }
-
- return f_false;
- }
-
f_string_length max_width = (input.stop - input.start) + 1;
if (max_width > buffer.used - input.start) {
max_width = buffer.used - input.start;
}
- f_status status = f_utf_is_space(buffer.string + input.start, max_width);
-
- if (f_status_is_error(status)) {
- return status;
- }
-
- if (status == f_true) {
- return f_true;
- }
-
- return f_false;
+ return f_utf_is_space(buffer.string + input.start, max_width);
}
#endif // _di_fl_fss_is_space_
f_return_status fl_fss_skip_past_whitespace(const f_dynamic_string buffer, f_string_location *input) {
#ifndef _di_level_1_parameter_checking_
if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (input == 0) return f_status_set_error(f_invalid_parameter);
if (input->start < 0) return f_status_set_error(f_invalid_parameter);
if (input->stop < input->start) return f_status_set_error(f_invalid_parameter);
if (input->start >= buffer.used) return f_status_set_error(f_invalid_parameter);
#endif // _di_level_1_parameter_checking_
f_status status = f_none;
- f_u_short max_width = 0;
+ f_u_short width = 0;
+
+ f_string_length max_width = (input->stop - input->start) + 1;
+
+ if (max_width > buffer.used - input->start) {
+ max_width = buffer.used - input->start;
+ }
- while (input->start < buffer.used && input->start > input->stop) {
- if (isgraph(buffer.string[input->start])) break;
+ while (buffer.string[input->start] == f_eos || (status = f_utf_is_graph(buffer.string + input->start, max_width)) == f_false) {
+ if (f_status_is_error(status)) {
+ return status;
+ }
- if (buffer.string[input->start] == f_eol) break;
+ if (buffer.string[input->start] == f_eol) return f_none_on_eol;
- if (buffer.string[input->start] != f_fss_delimit_placeholder) {
- max_width = (input->stop - input->start) + 1;
+ width = f_macro_utf_byte_width_is(buffer.string[input->start]);
- if (f_utf_is_space(buffer.string +input->start, max_width) != f_true) {
- if (f_utf_is_bom(buffer.string + input->start, max_width) != f_true) {
- break;
- }
- }
+ if (width == 0) {
+ width = 1;
}
+ // Do not operate on UTF-8 fragments that are not the first byte of the character.
+ else if (width == 1) {
+ return f_status_set_error(f_incomplete_utf);
+ }
+ else {
+ if (input->start + width >= buffer.used) return f_status_set_error(f_incomplete_utf_on_eos);
+ if (input->start + width > input->stop) return f_status_set_error(f_incomplete_utf_on_stop);
+ }
+
+ input->start += width;
+
+ if (input->start >= buffer.used) return f_none_on_eos;
+ if (input->start > input->stop) return f_none_on_stop;
- input->start++;
+ max_width = (input->stop - input->start) + 1;
+
+ if (max_width > buffer.used - input->start) {
+ max_width = buffer.used - input->start;
+ }
} // while
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
return f_none;
}
#endif // _di_fl_fss_skip_past_whitespace_
f_return_status fl_fss_skip_past_all_whitespace(const f_dynamic_string buffer, f_string_location *input) {
#ifndef _di_level_1_parameter_checking_
if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (input == 0) return f_status_set_error(f_invalid_parameter);
if (input->start < 0) return f_status_set_error(f_invalid_parameter);
if (input->stop < input->start) return f_status_set_error(f_invalid_parameter);
if (input->start >= buffer.used) return f_status_set_error(f_invalid_parameter);
#endif // _di_level_1_parameter_checking_
f_status status = f_none;
- f_u_short max_width = 0;
+ f_u_short width = 0;
- while (input->start < buffer.used && input->start > input->stop) {
- if (isgraph(buffer.string[input->start])) break;
+ f_string_length max_width = (input->stop - input->start) + 1;
- if (buffer.string[input->start] != f_fss_delimit_placeholder) {
- max_width = (input->stop - input->start) + 1;
+ if (max_width > buffer.used - input->start) {
+ max_width = buffer.used - input->start;
+ }
- if (f_utf_is_space(buffer.string + input->start, max_width) != f_true) {
- if (f_utf_is_bom(buffer.string + input->start, max_width) != f_true) {
- break;
- }
- }
+ while (buffer.string[input->start] == f_eos || (status = f_utf_is_graph(buffer.string + input->start, max_width)) == f_false) {
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
+ width = f_macro_utf_byte_width_is(buffer.string[input->start]);
+
+ if (width == 0) {
+ width = 1;
}
+ // Do not operate on UTF-8 fragments that are not the first byte of the character.
+ else if (width == 1) {
+ return f_status_set_error(f_incomplete_utf);
+ }
+ else {
+ if (input->start + width >= buffer.used) return f_status_set_error(f_incomplete_utf_on_eos);
+ if (input->start + width > input->stop) return f_status_set_error(f_incomplete_utf_on_stop);
+ }
+
+ input->start += width;
+
+ if (input->start >= buffer.used) return f_none_on_eos;
+ if (input->start > input->stop) return f_none_on_stop;
- input->start++;
+ max_width = (input->stop - input->start) + 1;
+
+ if (max_width > buffer.used - input->start) {
+ max_width = buffer.used - input->start;
+ }
} // while
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
return f_none;
}
#endif // _di_fl_fss_skip_past_all_whitespace_
build_libraries -lc
build_libraries_fll -lf_conversion -lf_file -lf_memory -lf_utf
build_sources_library fss.c fss_basic.c fss_basic_list.c fss_extended.c
-build_sources_program
+build_sources_program
build_sources_headers fss.h fss_basic.h fss_basic_list.h fss_status.h fss_extended.h fss_macro.h
build_sources_bash
build_sources_settings
max_width = buffer.used - location->start;
}
- while (buffer.string[location->start] == placeholder || (!isgraph(buffer.string[location->start]) && (status = f_utf_is_graph(buffer.string + location->start, max_width)) == f_false)) {
+ while (buffer.string[location->start] == placeholder || (status = f_utf_is_graph(buffer.string + location->start, max_width)) == f_false) {
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
if (buffer.string[location->start] == f_eol) return f_none_on_eol;
width = f_macro_utf_byte_width_is(buffer.string[location->start]);
max_width = buffer.used - location->start;
}
- while (buffer.string[location->start] == placeholder || (isgraph(buffer.string[location->start]) && (status = f_utf_is_space(buffer.string + location->start, max_width)) == f_false)) {
+ while (buffer.string[location->start] == placeholder || (status = f_utf_is_space(buffer.string + location->start, max_width)) == f_false) {
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
if (buffer.string[location->start] == f_eol) return f_none_on_eol;
width = f_macro_utf_byte_width_is(buffer.string[location->start]);
}
#endif // _di_fl_seek_line_to_
-#ifndef _di_fl_seek_line_to_character_
- f_return_status fl_seek_line_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this) {
+#ifndef _di_fl_seek_line_to_utf_character_
+ f_return_status fl_seek_line_to_utf_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this) {
#ifndef _di_level_1_parameter_checking_
if (location == 0) return f_status_set_error(f_invalid_parameter);
if (location->start < 0) return f_status_set_error(f_invalid_parameter);
return f_none_on_eos;
}
-#endif // _di_fl_seek_line_to_character_
+#endif // _di_fl_seek_line_to_utf_character_
#ifndef _di_fl_seek_to_
f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this) {
}
#endif // _di_fl_seek_to_
-#ifndef _di_fl_seek_to_character_
- f_return_status fl_seek_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this) {
+#ifndef _di_fl_seek_to_utf_character_
+ f_return_status fl_seek_to_utf_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this) {
#ifndef _di_level_1_parameter_checking_
if (location == 0) return f_status_set_error(f_invalid_parameter);
if (location->start < 0) return f_status_set_error(f_invalid_parameter);
return f_none_on_eos;
}
-#endif // _di_fl_seek_to_character_
+#endif // _di_fl_seek_to_utf_character_
#ifndef _di_fl_compare_strings_
f_return_status fl_compare_strings(const f_string string1, const f_string string2, const f_string_length length1, const f_string_length length2) {
* f_none_on_stop on success, but stopped stop location.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*
- * @see: fl_seek_line_to_character()
+ * @see fl_seek_line_to_utf_character()
*/
#ifndef _di_fl_seek_line_to_
extern f_return_status fl_seek_line_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this);
* f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*
- * @see: fl_seek_line_to()
+ * @see fl_seek_line_to()
*/
-#ifndef _di_fl_seek_line_to_character_
- extern f_return_status fl_seek_line_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this);
-#endif // _di_fl_seek_line_to_character_
+#ifndef _di_fl_seek_line_to_utf_character_
+ extern f_return_status fl_seek_line_to_utf_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this);
+#endif // _di_fl_seek_line_to_utf_character_
/**
* Seek the buffer location forward until the character (1-byte wide) is reached.
* f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*
- * @see: fl_seek_to_character()
+ * @see fl_seek_to_utf_character()
*/
#ifndef _di_fl_seek_to_
extern f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this);
#endif // _di_fl_seek_to_
/**
- * Seek the buffer location forward until the character (up to 4-byte wide) is reached.
+ * Seek the buffer location forward until the UTF-8 character (up to 4-byte wide) is reached.
*
* @param buffer
* The buffer to traverse.
* @return
* f_none on success.
* f_none_on_eos on success, but stopped at end of buffer.
+ * f_none_on_stop on success, but stopped stop location.
* f_invalid_utf (with error bit) if character is an invalid UTF-8 character.
* f_incomplete_utf (with error bit) if character is an incomplete UTF-8 fragment.
* f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
* f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*
- * @see: fl_seek_to()
+ * @see fl_seek_to()
*/
-#ifndef _di_fl_seek_to_character_
- extern f_return_status fl_seek_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this);
-#endif // _di_fl_seek_to_character_
+#ifndef _di_fl_seek_to_utf_character_
+ extern f_return_status fl_seek_to_utf_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this);
+#endif // _di_fl_seek_to_utf_character_
/**
* Compare two strings, similar to strncmp().
* f_not_equal_to when both strings do not equal.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*
- * @see: fl_compare_dynamic_strings()
- * @see: fl_compare_dynamic_strings_partial()
+ * @see fl_compare_dynamic_strings()
+ * @see fl_compare_dynamic_strings_partial()
*/
#ifndef _di_fl_compare_strings_
extern f_return_status fl_compare_strings(const f_string string1, const f_string string2, const f_string_length length1, const f_string_length length2);
* f_not_equal_to when both strings do not equal.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*
- * @see: fl_compare_strings()
- * @see: fl_compare_dynamic_strings_partial()
+ * @see fl_compare_strings()
+ * @see fl_compare_dynamic_strings_partial()
*/
#ifndef _di_fl_compare_dynamic_strings_
extern f_return_status fl_compare_dynamic_strings(const f_dynamic_string string1, const f_dynamic_string string2);
* f_not_equal_to when both strings do not equal.
* f_invalid_parameter (with error bit) if a parameter is invalid.
*
- * @see: fl_compare_strings()
- * @see: fl_compare_dynamic_strings()
+ * @see fl_compare_strings()
+ * @see fl_compare_dynamic_strings()
*/
#ifndef _di_fl_compare_partial_dynamic_strings_
extern f_return_status fl_compare_dynamic_strings_partial(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2);
--- /dev/null
+#include <level_1/utf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _di_fl_rip_utf_string_
+ f_return_status fl_rip_utf_string(const f_utf_string_dynamic buffer, const f_utf_string_location location, f_utf_string_dynamic *result) {
+ #ifndef _di_level_1_parameter_checking_
+ if (location.start < 0) return f_status_set_error(f_invalid_parameter);
+ if (location.stop < location.start) return f_status_set_error(f_invalid_parameter);
+ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (location.start >= buffer.used) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ // the start and stop point are inclusive locations, and therefore start - stop is actually 1 too few locations
+ f_utf_string_length size = (location.stop - location.start) + 1;
+
+ if (size > 0) {
+ f_status status = f_none;
+
+ if (result == 0) {
+ f_new_utf_string_dynamic(status, (*result), size);
+ }
+ else {
+ f_resize_utf_string_dynamic(status, (*result), size);
+ }
+
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
+ memcpy(result->string, buffer.string + location.start, sizeof(f_utf_character) * size);
+ result->used = size;
+
+ return f_none;
+ }
+
+ return f_no_data;
+ }
+#endif // _di_fl_rip_utf_string_
+
+#ifndef _di_fl_utf_seek_line_until_graph_
+ f_return_status fl_utf_seek_line_until_graph(const f_utf_string_dynamic buffer, f_utf_string_location *location, const f_utf_character placeholder) {
+ #ifndef _di_level_1_parameter_checking_
+ if (location == 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start < 0) return f_status_set_error(f_invalid_parameter);
+ if (location->stop < location->start) return f_status_set_error(f_invalid_parameter);
+ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start >= buffer.used) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ f_status status = f_none;
+
+ while (buffer.string[location->start] == placeholder || (status = f_utf_is_graph_character(buffer.string[location->start])) == f_false) {
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
+ if (buffer.string[location->start] == f_utf_character_eol) return f_none_on_eol;
+
+ if (f_macro_utf_character_width_is(buffer.string[location->start]) == 1) {
+ return f_status_set_error(f_invalid_utf);
+ }
+
+ location->start++;
+
+ if (location->start >= buffer.used) return f_none_on_eos;
+ if (location->start > location->stop) return f_none_on_stop;
+ } // while
+
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
+ return f_none;
+ }
+#endif // _di_fl_utf_seek_line_until_graph_
+
+#ifndef _di_fl_utf_seek_line_until_non_graph_
+ f_return_status fl_utf_seek_line_until_non_graph(const f_utf_string_dynamic buffer, f_utf_string_location *location, const f_utf_character placeholder) {
+ #ifndef _di_level_1_parameter_checking_
+ if (location == 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start < 0) return f_status_set_error(f_invalid_parameter);
+ if (location->stop < location->start) return f_status_set_error(f_invalid_parameter);
+ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start >= buffer.used) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ f_status status = f_none;
+
+ while (buffer.string[location->start] == placeholder || (status = f_utf_is_space_character(buffer.string[location->start])) == f_false) {
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
+ if (buffer.string[location->start] == f_utf_character_eol) return f_none_on_eol;
+
+ if (f_macro_utf_character_width_is(buffer.string[location->start]) == 1) {
+ return f_status_set_error(f_invalid_utf);
+ }
+
+ location->start++;
+
+ if (location->start >= buffer.used) return f_none_on_eos;
+ if (location->start > location->stop) return f_none_on_stop;
+ } // while
+
+ if (f_status_is_error(status)) {
+ return status;
+ }
+
+ return f_none;
+ }
+#endif // _di_fl_utf_seek_line_until_non_graph_
+
+#ifndef _di_fl_utf_seek_line_to_
+ f_return_status fl_utf_seek_line_to(const f_utf_string_dynamic buffer, f_utf_string_location *location, const f_utf_character seek_to_this) {
+ #ifndef _di_level_1_parameter_checking_
+ if (location == 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start < 0) return f_status_set_error(f_invalid_parameter);
+ if (location->stop < location->start) return f_status_set_error(f_invalid_parameter);
+ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start >= buffer.used) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ while (buffer.string[location->start] != seek_to_this) {
+ if (buffer.string[location->start] == f_utf_character_eol) return f_none_on_eol;
+
+ if (f_macro_utf_character_width_is(buffer.string[location->start]) == 1) {
+ return f_status_set_error(f_invalid_utf);
+ }
+
+ location->start++;
+
+ if (location->start >= buffer.used) return f_none_on_eos;
+ if (location->start > location->stop) return f_none_on_stop;
+ } // while
+
+ return f_none;
+ }
+#endif // _di_fl_utf_seek_line_to_
+
+#ifndef _di_fl_utf_seek_line_to_char_
+ f_return_status fl_utf_seek_line_to_char(const f_utf_string_dynamic buffer, f_utf_string_location *location, const char seek_to_this) {
+ #ifndef _di_level_1_parameter_checking_
+ if (location == 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start < 0) return f_status_set_error(f_invalid_parameter);
+ if (location->stop < location->start) return f_status_set_error(f_invalid_parameter);
+ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start >= buffer.used) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ f_utf_character seek_to_character = seek_to_this << 24;
+
+ while (buffer.string[location->start] != seek_to_character) {
+ if (buffer.string[location->start] == f_utf_character_eol) return f_none_on_eol;
+
+ if (f_macro_utf_character_width_is(buffer.string[location->start]) == 1) {
+ return f_status_set_error(f_invalid_utf);
+ }
+
+ location->start++;
+
+ if (location->start >= buffer.used) return f_none_on_eos;
+ if (location->start > location->stop) return f_none_on_stop;
+ } // while
+
+ return f_none;
+ }
+#endif // _di_fl_utf_seek_line_to_character_
+
+#ifndef _di_fl_utf_string_seek_to_
+ f_return_status fl_utf_string_seek_to(const f_utf_string_dynamic buffer, f_utf_string_location *location, const f_utf_character seek_to_this) {
+ #ifndef _di_level_1_parameter_checking_
+ if (location == 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start < 0) return f_status_set_error(f_invalid_parameter);
+ if (location->stop < location->start) return f_status_set_error(f_invalid_parameter);
+ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start >= buffer.used) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ while (buffer.string[location->start] != seek_to_this) {
+ if (f_macro_utf_character_width_is(buffer.string[location->start]) == 1) {
+ return f_status_set_error(f_invalid_utf);
+ }
+
+ location->start++;
+
+ if (location->start >= buffer.used) return f_none_on_eos;
+ if (location->start > location->stop) return f_none_on_stop;
+ } // while
+
+ return f_none;
+ }
+#endif // _di_fl_utf_string_seek_to_
+
+#ifndef _di_fl_utf_string_seek_to_char_
+ f_return_status fl_utf_string_seek_to_char(const f_utf_string_dynamic buffer, f_utf_string_location *location, const char seek_to_this) {
+ #ifndef _di_level_1_parameter_checking_
+ if (location == 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start < 0) return f_status_set_error(f_invalid_parameter);
+ if (location->stop < location->start) return f_status_set_error(f_invalid_parameter);
+ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (location->start >= buffer.used) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ f_utf_character seek_to_character = seek_to_this << 24;
+
+ while (buffer.string[location->start] != seek_to_character) {
+ if (f_macro_utf_character_width_is(buffer.string[location->start]) == 1) {
+ return f_status_set_error(f_invalid_utf);
+ }
+
+ location->start++;
+
+ if (location->start >= buffer.used) return f_none_on_eos;
+ if (location->start > location->stop) return f_none_on_stop;
+ } // while
+
+ return f_none;
+ }
+#endif // _di_fl_utf_string_seek_to_char_
+
+#ifndef _di_fl_utf_string_compare_
+ f_return_status fl_utf_string_compare(const f_utf_string string1, const f_utf_string string2, const f_utf_string_length length1, const f_utf_string_length length2) {
+ #ifndef _di_level_1_parameter_checking_
+ if (length1 <= 0) return f_status_set_error(f_invalid_parameter);
+ if (length2 <= 0) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ f_utf_string_length i1 = 0;
+ f_utf_string_length i2 = 0;
+
+ for (; i1 < length1 && i2 < length2; i1++, i2++) {
+ // skip past newlines in string1.
+ while (i1 < length1 && string1[i1] == f_utf_character_eos) i1++;
+ if (i1 == length1) break;
+
+ // skip past newlines in string2.
+ while (i2 < length2 && string2[i2] == f_utf_character_eos) i2++;
+ if (i2 == length2) break;
+
+ if (string1[i1] != string2[i2]) return f_not_equal_to;
+ } // for
+
+ // only return f_equal_to if all remaining characters are NULL.
+ while (i1 < length1) {
+ if (string1[i1] != f_utf_character_eos) return f_not_equal_to;
+ i1++;
+ } // while
+
+ while (i2 < length2) {
+ if (string2[i2] != f_utf_character_eos) return f_not_equal_to;
+ i2++;
+ } // while
+
+ return f_equal_to;
+ }
+#endif // _di_fl_utf_string_compare_
+
+#ifndef _di_fl_utf_string_dynamic_compare_
+ f_return_status fl_utf_string_dynamic_compare(const f_utf_string_dynamic string1, const f_utf_string_dynamic string2) {
+ #ifndef _di_level_1_parameter_checking_
+ if (string1.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (string2.used <= 0) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ f_utf_string_length i1 = 0;
+ f_utf_string_length i2 = 0;
+
+ for (; i1 < string1.used && i2 < string2.used; i1++, i2++) {
+ // skip past newlines in string1.
+ while (i1 < string1.used && string1.string[i1] == f_utf_character_eos) i1++;
+ if (i1 == string1.used) break;
+
+ // skip past newlines in string2.
+ while (i2 < string2.used && string2.string[i2] == f_utf_character_eos) i2++;
+ if (i2 == string2.used) break;
+
+ if (string1.string[i1] != string2.string[i2]) return f_not_equal_to;
+ } // for
+
+ // only return f_equal_to if all remaining characters are NULL.
+ while (i1 < string1.used) {
+ if (string1.string[i1] != f_utf_character_eos) return f_not_equal_to;
+ i1++;
+ } // while
+
+ while (i2 < string2.used) {
+ if (string2.string[i2] != f_utf_character_eos) return f_not_equal_to;
+ i2++;
+ } // while
+
+ return f_equal_to;
+ }
+#endif // _di_fl_utf_string_dynamic_compare_
+
+#ifndef _di_fl_compare_partial_dynamic_strings_
+ f_return_status fl_utf_string_compare_dynamic_partial(const f_utf_string_dynamic string1, const f_utf_string_dynamic string2, const f_utf_string_location offset1, const f_utf_string_location offset2) {
+ #ifndef _di_level_1_parameter_checking_
+ if (string1.used <= 0) return f_status_set_error(f_invalid_parameter);
+ if (string2.used <= 0) return f_status_set_error(f_invalid_parameter);
+
+ if (offset1.start > offset1.stop) return f_status_set_error(f_invalid_parameter);
+ if (offset2.start > offset2.stop) return f_status_set_error(f_invalid_parameter);
+
+ if (string1.used <= offset1.stop) return f_status_set_error(f_invalid_parameter);
+ if (string2.used <= offset2.stop) return f_status_set_error(f_invalid_parameter);
+ #endif // _di_level_1_parameter_checking_
+
+ f_utf_string_length i1 = offset1.start;
+ f_utf_string_length i2 = offset2.start;
+
+ const f_utf_string_length stop1 = offset1.stop + 1;
+ const f_utf_string_length stop2 = offset2.stop + 1;
+
+ for (; i1 < stop1 && i2 < stop2; i1++, i2++) {
+ // skip past newlines in string1.
+ while (i1 < stop1 && string1.string[i1] == f_utf_character_eos) i1++;
+ if (i1 == stop1) break;
+
+ // skip past newlines in string2.
+ while (i2 < stop2 && string2.string[i2] == f_utf_character_eos) i2++;
+ if (i2 == stop2) break;
+
+ if (string1.string[i1] != string2.string[i2]) return f_not_equal_to;
+ } // for
+
+ // only return f_equal_to if all remaining characters are NULL.
+ while (i1 < stop1) {
+ if (string1.string[i1] != f_utf_character_eos) return f_not_equal_to;
+ i1++;
+ } // while
+
+ while (i2 < stop2) {
+ if (string2.string[i2] != f_utf_character_eos) return f_not_equal_to;
+ i2++;
+ } // while
+
+ return f_equal_to;
+ }
+#endif // _di_fl_compare_partial_dynamic_strings_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
--- /dev/null
+/**
+ * FLL - Level 1
+ *
+ * Project: Utf
+ * API Version: 0.5
+ * Licenses: lgplv2.1
+ *
+ * Provides UTF-8 character manipulation and processing capabilities.
+ */
+#ifndef _FL_strings_h
+#define _FL_strings_h
+
+// libc includes
+#include <ctype.h>
+#include <string.h>
+
+// fll includes
+#include <level_0/status.h>
+#include <level_0/memory.h>
+#include <level_0/strings.h>
+#include <level_0/types.h>
+#include <level_0/utf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Allocated a new UTF-8 string from the provided range in the buffer.
+ *
+ * @param buffer
+ * The buffer to rip from.
+ * @param location
+ * A range within the buffer representing the string to rip.
+ * @param result
+ * The new string, which will be allocated or reallocated as necessary.
+ *
+ * @return
+ * f_none on success.
+ * f_no_data if nothing to rip, no allocations or reallocations are performed.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ * f_allocation_error (with error bit) on memory allocation error.
+ * f_reallocation_error (with error bit) on memory reallocation error.
+ */
+#ifndef _di_fl_rip_utf_string_
+ extern f_return_status fl_rip_utf_string(const f_utf_string_dynamic buffer, const f_utf_string_location location, f_utf_string_dynamic *result);
+#endif // _di_fl_rip_utf_string_
+
+/**
+ * Increment buffer location until a graph character or an EOL is matched.
+ *
+ * This will ignore the UTF-8 BOM.
+ *
+ * @param buffer
+ * The buffer to traverse.
+ * @param location
+ * A range within the buffer representing the start and stop locations.
+ * @param placeholder
+ * A UTF-8 character representing a placeholder to ignore (may be NULL).
+ *
+ * @return
+ * f_none on success.
+ * f_none_on_eol on success, but stopped at EOL.
+ * f_none_on_eos on success, but stopped at end of buffer.
+ * f_invalid_utf (with error bit) if a character in the buffer is an invalid UTF-8 character.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ */
+#ifndef _di_fl_utf_seek_line_until_graph_
+ extern f_return_status fl_utf_seek_line_until_graph(const f_utf_string_dynamic buffer, f_utf_string_location *location, const f_utf_character placeholder);
+#endif // _di_fl_utf_seek_line_until_graph_
+
+/**
+ * Increment buffer location until a non-graph character or an EOL is matched.
+ *
+ * This will ignore the UTF-8 BOM.
+ *
+ * @param buffer
+ * The buffer to traverse.
+ * @param location
+ * A range within the buffer representing the start and stop locations.
+ * @param placeholder
+ * A single-width character representing a placeholder to ignore (may be NULL).
+ *
+ * @return
+ * f_none on success.
+ * f_none_on_eol on success, but stopped at EOL.
+ * f_none_on_eos on success, but stopped at end of buffer.
+ * f_none_on_stop on success, but stopped stop location.
+ * f_invalid_utf (with error bit) if a character in the buffer is an invalid UTF-8 character.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ */
+#ifndef _di_fl_utf_seek_line_until_non_graph_
+ extern f_return_status fl_utf_seek_line_until_non_graph(const f_utf_string_dynamic buffer, f_utf_string_location *location, const f_utf_character placeholder);
+#endif // _di_fl_utf_seek_line_until_non_graph_
+
+/**
+ * Seek the buffer location forward until the UTF-8 character or EOL is reached.
+ *
+ * @param buffer
+ * The buffer to traverse.
+ * @param location
+ * A range within the buffer representing the start and stop locations.
+ * The start location will be incremented by seek.
+ * @param seek_to_this
+ * A UTF-8 character representing a character to seek to.
+ *
+ * @return
+ * f_none on success.
+ * f_none_on_eol on success, but stopped at EOL.
+ * f_none_on_eos on success, but stopped at end of buffer.
+ * f_none_on_stop on success, but stopped stop location.
+ * f_invalid_utf (with error bit) if a character in the buffer is an invalid UTF-8 character.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see fl_utf_seek_line_to_char()
+ */
+#ifndef _di_fl_utf_seek_line_to_
+ extern f_return_status fl_utf_seek_line_to(const f_utf_string_dynamic buffer, f_utf_string_location *location, const f_utf_character seek_to_this);
+#endif // _di_fl_utf_seek_line_to_
+
+/**
+ * Seek the buffer location forward until the 1-byte wide character or EOL is reached.
+ *
+ * @param buffer
+ * The buffer to traverse.
+ * @param location
+ * A range within the buffer representing the start and stop locations.
+ * The start location will be incremented by seek.
+ * @param seek_to_this
+ * A single-width non-UTF-8 character.
+ *
+ * @return
+ * f_none on success.
+ * f_none_on_eol on success, but stopped at EOL.
+ * f_none_on_eos on success, but stopped at end of buffer.
+ * f_invalid_utf (with error bit) if a character in the buffer is an invalid UTF-8 character.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see fl_utf_seek_line_to()
+ */
+#ifndef _di_fl_utf_string_seek_line_to_char_
+ extern f_return_status fl_utf_seek_line_to_char(const f_utf_string_dynamic buffer, f_utf_string_location *location, const char seek_to_this);
+#endif // _di_fl_utf_string_seek_line_to_char_
+
+/**
+ * Seek the buffer location forward until the character (1-byte wide) is reached.
+ *
+ * @param buffer
+ * The buffer to traverse.
+ * @param location
+ * A range within the buffer representing the start and stop locations.
+ * The start location will be incremented by seek.
+ * @param seek_to_this
+ * A UTF-8 character representing a character to seek to.
+ *
+ * @return
+ * f_none on success.
+ * f_none_on_eos on success, but stopped at end of buffer.
+ * f_none_on_stop on success, but stopped stop location.
+ * f_invalid_utf (with error bit) if a character in the buffer is an invalid UTF-8 character.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see fl_utf_string_seek_to_char()
+ */
+#ifndef _di_fl_utf_string_seek_to_
+ extern f_return_status fl_utf_string_seek_to(const f_utf_string_dynamic buffer, f_utf_string_location *location, const f_utf_character seek_to_this);
+#endif // _di_fl_utf_string_seek_to_
+
+/**
+ * Seek the buffer location forward until the UTF-8 character (up to 4-byte wide) is reached.
+ *
+ * @param buffer
+ * The buffer to traverse.
+ * @param location
+ * A range within the buffer representing the start and stop locations.
+ * The start location will be incremented by seek.
+ * @param seek_to_this
+ * A single-width non-UTF-8 character.
+ *
+ * @return
+ * f_none on success.
+ * f_none_on_eos on success, but stopped at end of buffer.
+ * f_none_on_stop on success, but stopped stop location.
+ * f_invalid_utf (with error bit) if a character in the buffer is an invalid UTF-8 character.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see fl_utf_string_seek_to()
+ */
+#ifndef _di_fl_utf_string_seek_to_character_
+ extern f_return_status fl_utf_string_seek_to_char(const f_utf_string_dynamic buffer, f_utf_string_location *location, const char seek_to_this);
+#endif // _di_fl_utf_string_seek_to_character_
+
+/**
+ * Compare two strings, similar to strncmp().
+ *
+ * This does not stop on NULL.
+ * NULL characters are ignored.
+ *
+ * @param string1
+ * String to compare.
+ * @param string2
+ * String to compare.
+ * @param length1
+ * Length of string1.
+ * @param length2
+ * Length of string2.
+ *
+ * @return
+ * f_equal_to when both strings equal.
+ * f_not_equal_to when both strings do not equal.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see fl_utf_string_dynamic_compare()
+ * @see fl_utf_string_compare_dynamic_partial()
+ */
+#ifndef _di_fl_utf_string_compare_
+ extern f_return_status fl_utf_string_compare(const f_utf_string string1, const f_utf_string string2, const f_utf_string_length length1, const f_utf_string_length length2);
+#endif // _di_fl_utf_string_compare_
+
+/**
+ * Compare two strings, similar to strncmp().
+ *
+ * This does not stop on NULL.
+ * NULL characters are ignored.
+ *
+ * @param string1
+ * String to compare.
+ * @param string2
+ * String to compare.
+ *
+ * @return
+ * f_equal_to when both strings equal.
+ * f_not_equal_to when both strings do not equal.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see fl_utf_string_compare()
+ * @see fl_utf_string_compare_dynamic_partial()
+ */
+#ifndef _di_fl_utf_string_dynamic_compare_
+ extern f_return_status fl_utf_string_dynamic_compare(const f_utf_string_dynamic string1, const f_utf_string_dynamic string2);
+#endif // _di_fl_utf_string_dynamic_compare_
+
+/**
+ * Compare two strings, similar to strncmp(), but restricted to the given ranges.
+ *
+ * This does not stop on NULL.
+ * NULL characters are ignored.
+ *
+ * @param string1
+ * String to compare.
+ * @param string2
+ * String to compare.
+ * @param offset1
+ * A range within the string1 to restrict the comparison to.
+ * @param offset2
+ * A range within the string2 to restrict the comparison to.
+ *
+ * @return
+ * f_equal_to when both strings equal.
+ * f_not_equal_to when both strings do not equal.
+ * f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see fl_utf_string_compare()
+ * @see fl_utf_string_dynamic_compare()
+ */
+#ifndef _di_fl_utf_string_compare_dynamic_partial_
+ extern f_return_status fl_utf_string_compare_dynamic_partial(const f_utf_string_dynamic string1, const f_utf_string_dynamic string2, const f_utf_string_location offset1, const f_utf_string_location offset2);
+#endif // _di_fl_utf_string_compare_dynamic_partial_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _FL_strings_h
--- /dev/null
+f_types
+f_errors
+f_memory
+f_strings
+f_utf
--- /dev/null
+# fss-0000
+
+project_name fl_utf
+project_level 1
+
+version_major 0
+version_minor 5
+version_micro 0
+
+build_compiler gcc
+build_linker ar
+build_libraries -lc
+build_libraries_fll -lf_memory -lf_utf
+build_sources_library utf.c
+build_sources_program
+build_sources_headers utf.h
+build_sources_bash
+build_sources_settings
+build_shared yes
+build_static yes
+
+defines_all
+defines_static
+defines_shared
+
+flags_all -z now
+flags_shared
+flags_static
+flags_library -fPIC
+flags_program -fPIE