From: Kevin Day Date: Wed, 28 Aug 2019 04:59:00 +0000 (-0500) Subject: Update: add more support for UTF-8 and improve effected functions X-Git-Tag: 0.5.0~477 X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=e7b485e59abad9fd03263d969fb999f72bed1f24;p=fll Update: add more support for UTF-8 and improve effected functions Use a 32-bit unsigned integer instead of a structure of 4 characters. Add -lf_utf to dependencies on updated level projects. Be more thorough when implementing simple fl_seek_* functions. Support both processing char as well as f_utf_character in string processing. Continue updating code documentation. --- diff --git a/level_0/f_serialized/c/serialized.h b/level_0/f_serialized/c/serialized.h index 8872379..fa97c99 100644 --- a/level_0/f_serialized/c/serialized.h +++ b/level_0/f_serialized/c/serialized.h @@ -7,7 +7,7 @@ * * Provides string processing functionality for what is to be defined as a serialized string. * Serialized strings are strings that can hold multiple values in a single variable. - * An example of serialized content is the PATH environment variable where ":" separates data.. + * An example of serialized content is the PATH environment variable where ":" separates data. */ #ifndef _F_serialized_h #define _F_serialized_h @@ -34,7 +34,8 @@ extern "C" { #endif // _di_f_serialized_splitters_ #ifndef _di_f_serialized_default_allocation_step_ - #define f_serialized_default_allocation_step f_memory_default_allocation_step + // provide a UTF-8 friendly allocation step. + #define f_serialized_default_allocation_step 4 #endif // _di_f_serialized_default_allocation_step_ #ifdef __cplusplus diff --git a/level_0/f_utf/c/utf.c b/level_0/f_utf/c/utf.c index 3a947dc..10c61fa 100644 --- a/level_0/f_utf/c/utf.c +++ b/level_0/f_utf/c/utf.c @@ -4,8 +4,8 @@ extern "C" { #endif -#ifndef _di_f_utf_is_bom_string_ - f_return_status f_utf_is_bom_string(const f_string character, const f_u_short max_width) { +#ifndef _di_f_utf_is_bom_ + f_return_status f_utf_is_bom(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ @@ -28,10 +28,10 @@ extern "C" { return f_false; } -#endif // _di_f_utf_is_bom_string_ +#endif // _di_f_utf_is_bom_ -#ifndef _di_f_utf_is_graph_string_ - f_return_status f_utf_is_graph_string(const f_string character, const f_u_short max_width) { +#ifndef _di_f_utf_is_graph_ + f_return_status f_utf_is_graph(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ @@ -48,20 +48,20 @@ extern "C" { // for now, just assume that any non-whitespace, non-substitute utf-8 character is a graph. - if (f_utf_is_space_string(character, max_width) == f_true) { + if (f_utf_is_space(character, max_width) == f_true) { return f_false; } - if (f_utf_is_bom_string(character, max_width) == f_true) { + if (f_utf_is_bom(character, max_width) == f_true) { return f_false; } return f_true; } -#endif // _di_f_utf_is_graph_string_ +#endif // _di_f_utf_is_graph_ -#ifndef _di_f_utf_is_space_string_ - f_return_status f_utf_is_space_string(const f_string character, const f_u_short max_width) { +#ifndef _di_f_utf_is_space_ + f_return_status f_utf_is_space(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ @@ -190,10 +190,10 @@ extern "C" { return f_false; } -#endif // _di_f_utf_is_space_string_ +#endif // _di_f_utf_is_space_ -#ifndef _di_f_utf_is_substitute_string_ - f_return_status f_utf_is_substitute_string(const f_string character, const f_u_short max_width) { +#ifndef _di_f_utf_is_substitute_ + f_return_status f_utf_is_substitute(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ @@ -238,10 +238,10 @@ extern "C" { return f_false; } -#endif // _di_f_utf_is_substitute_string_ +#endif // _di_f_utf_is_substitute_ -#ifndef _di_f_utf_is_whitespace_string_ - f_return_status f_utf_is_whitespace_string(const f_string character, const f_u_short max_width) { +#ifndef _di_f_utf_is_whitespace_ + f_return_status f_utf_is_whitespace(const f_string character, const f_u_short max_width) { #ifndef _di_level_0_parameter_checking_ if (max_width < 1) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ @@ -350,20 +350,12 @@ extern "C" { return f_false; } -#endif // _di_f_utf_is_whitespace_string_ +#endif // _di_f_utf_is_whitespace_ #ifndef _di_f_utf_is_bom_character_ f_return_status f_utf_is_bom_character(const f_utf_character character) { - f_u_short width = f_macro_utf_byte_width(character.byte_1); - - if (width == 1) { - return f_false; - } - - if (width == 3) { - if (character.byte_1 == f_utf_bom[0] && character.byte_2 == f_utf_bom[1] && character.byte_3 == f_utf_bom[2]) { - return f_true; - } + if (character == f_macro_utf_character_mask_bom) { + return f_true; } return f_false; @@ -372,7 +364,7 @@ extern "C" { #ifndef _di_f_utf_is_graph_character_ f_return_status f_utf_is_graph_character(const f_utf_character character) { - f_u_short width = f_macro_utf_byte_width(character.byte_1); + f_u_short width = f_macro_utf_character_width_is(character); if (width == 0) { return f_false; @@ -394,26 +386,28 @@ extern "C" { #ifndef _di_f_utf_is_space_character_ f_return_status f_utf_is_space_character(const f_utf_character character) { - f_u_short width = f_macro_utf_byte_width(character.byte_1); + f_u_short width = f_macro_utf_character_width(character); if (width == 1) { return f_false; } if (width == 2) { - if (character.byte_1 == f_utf_space_no_break[0] && character.byte_2 == f_utf_space_no_break[1]) { + char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) }; + + if (utf[0] == f_utf_space_no_break[0] && utf[1] == f_utf_space_no_break[1]) { return f_true; } - if (character.byte_1 == f_utf_space_line_feed_reverse[0] && character.byte_2 == f_utf_space_line_feed_reverse[1]) { + if (utf[0] == f_utf_space_line_feed_reverse[0] && utf[1] == f_utf_space_line_feed_reverse[1]) { return f_true; } - if (character.byte_1 == f_utf_space_line_next[0] && character.byte_2 == f_utf_space_line_next[1]) { + if (utf[0] == f_utf_space_line_next[0] && utf[1] == f_utf_space_line_next[1]) { return f_true; } - if (character.byte_1 == f_utf_substitute_middle_dot[0] && character.byte_2 == f_utf_substitute_middle_dot[1]) { + if (utf[0] == f_utf_substitute_middle_dot[0] && utf[1] == f_utf_substitute_middle_dot[1]) { return f_true; } @@ -421,91 +415,93 @@ extern "C" { } if (width == 3) { - if (character.byte_1 == f_utf_space_no_break_narrow[0] && character.byte_2 == f_utf_space_no_break_narrow[1] && character.byte_3 == f_utf_space_no_break_narrow[2]) { + char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) }; + + if (utf[0] == f_utf_space_no_break_narrow[0] && utf[1] == f_utf_space_no_break_narrow[1] && utf[2] == f_utf_space_no_break_narrow[2]) { return f_true; } - if (character.byte_1 == f_utf_space_en[0] && character.byte_2 == f_utf_space_en[1] && character.byte_3 == f_utf_space_en[2]) { + if (utf[0] == f_utf_space_en[0] && utf[1] == f_utf_space_en[1] && utf[2] == f_utf_space_en[2]) { return f_true; } - if (character.byte_1 == f_utf_space_en_quad[0] && character.byte_2 == f_utf_space_en_quad[1] && character.byte_3 == f_utf_space_en_quad[2]) { + if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) { return f_true; } - if (character.byte_1 == f_utf_space_en_quad[0] && character.byte_2 == f_utf_space_en_quad[1] && character.byte_3 == f_utf_space_en_quad[2]) { + if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em[0] && character.byte_2 == f_utf_space_em[1] && character.byte_3 == f_utf_space_em[2]) { + if (utf[0] == f_utf_space_em[0] && utf[1] == f_utf_space_em[1] && utf[2] == f_utf_space_em[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em_quad[0] && character.byte_2 == f_utf_space_em_quad[1] && character.byte_3 == f_utf_space_em_quad[2]) { + if (utf[0] == f_utf_space_em_quad[0] && utf[1] == f_utf_space_em_quad[1] && utf[2] == f_utf_space_em_quad[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em_per_three[0] && character.byte_2 == f_utf_space_em_per_three[1] && character.byte_3 == f_utf_space_em_per_three[2]) { + if (utf[0] == f_utf_space_em_per_three[0] && utf[1] == f_utf_space_em_per_three[1] && utf[2] == f_utf_space_em_per_three[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em_per_four[0] && character.byte_2 == f_utf_space_em_per_four[1] && character.byte_3 == f_utf_space_em_per_four[2]) { + if (utf[0] == f_utf_space_em_per_four[0] && utf[1] == f_utf_space_em_per_four[1] && utf[2] == f_utf_space_em_per_four[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em_per_six[0] && character.byte_2 == f_utf_space_em_per_six[1] && character.byte_3 == f_utf_space_em_per_six[2]) { + if (utf[0] == f_utf_space_em_per_six[0] && utf[1] == f_utf_space_em_per_six[1] && utf[2] == f_utf_space_em_per_six[2]) { return f_true; } - if (character.byte_1 == f_utf_space_figure[0] && character.byte_2 == f_utf_space_figure[1] && character.byte_3 == f_utf_space_figure[2]) { + if (utf[0] == f_utf_space_figure[0] && utf[1] == f_utf_space_figure[1] && utf[2] == f_utf_space_figure[2]) { return f_true; } - if (character.byte_1 == f_utf_space_punctuation[0] && character.byte_2 == f_utf_space_punctuation[1] && character.byte_3 == f_utf_space_punctuation[2]) { + if (utf[0] == f_utf_space_punctuation[0] && utf[1] == f_utf_space_punctuation[1] && utf[2] == f_utf_space_punctuation[2]) { return f_true; } - if (character.byte_1 == f_utf_space_thin[0] && character.byte_2 == f_utf_space_thin[1] && character.byte_3 == f_utf_space_thin[2]) { + if (utf[0] == f_utf_space_thin[0] && utf[1] == f_utf_space_thin[1] && utf[2] == f_utf_space_thin[2]) { return f_true; } - if (character.byte_1 == f_utf_space_hair[0] && character.byte_2 == f_utf_space_hair[1] && character.byte_3 == f_utf_space_hair[2]) { + if (utf[0] == f_utf_space_hair[0] && utf[1] == f_utf_space_hair[1] && utf[2] == f_utf_space_hair[2]) { return f_true; } - if (character.byte_1 == f_utf_space_separator_line[0] && character.byte_2 == f_utf_space_separator_line[1] && character.byte_3 == f_utf_space_separator_line[2]) { + if (utf[0] == f_utf_space_separator_line[0] && utf[1] == f_utf_space_separator_line[1] && utf[2] == f_utf_space_separator_line[2]) { return f_true; } - if (character.byte_1 == f_utf_space_separator_paragraph[0] && character.byte_2 == f_utf_space_separator_paragraph[1] && character.byte_3 == f_utf_space_separator_paragraph[2]) { + if (utf[0] == f_utf_space_separator_paragraph[0] && utf[1] == f_utf_space_separator_paragraph[1] && utf[2] == f_utf_space_separator_paragraph[2]) { return f_true; } - if (character.byte_1 == f_utf_space_ogham[0] && character.byte_2 == f_utf_space_ogham[1] && character.byte_3 == f_utf_space_ogham[2]) { + if (utf[0] == f_utf_space_ogham[0] && utf[1] == f_utf_space_ogham[1] && utf[2] == f_utf_space_ogham[2]) { return f_true; } - if (character.byte_1 == f_utf_space_ideographic[0] && character.byte_2 == f_utf_space_ideographic[1] && character.byte_3 == f_utf_space_ideographic[2]) { + if (utf[0] == f_utf_space_ideographic[0] && utf[1] == f_utf_space_ideographic[1] && utf[2] == f_utf_space_ideographic[2]) { return f_true; } - if (character.byte_1 == f_utf_space_medium_mathematical[0] && character.byte_2 == f_utf_space_medium_mathematical[1] && character.byte_3 == f_utf_space_medium_mathematical[2]) { + if (utf[0] == f_utf_space_medium_mathematical[0] && utf[1] == f_utf_space_medium_mathematical[1] && utf[2] == f_utf_space_medium_mathematical[2]) { return f_true; } - if (character.byte_1 == f_utf_substitute_symbol_blank[0] && character.byte_2 == f_utf_substitute_symbol_blank[1] && character.byte_3 == f_utf_substitute_symbol_blank[2]) { + if (utf[0] == f_utf_substitute_symbol_blank[0] && utf[1] == f_utf_substitute_symbol_blank[1] && utf[2] == f_utf_substitute_symbol_blank[2]) { return f_true; } - if (character.byte_1 == f_utf_substitute_symbol_space[0] && character.byte_2 == f_utf_substitute_symbol_space[1] && character.byte_3 == f_utf_substitute_symbol_space[2]) { + if (utf[0] == f_utf_substitute_symbol_space[0] && utf[1] == f_utf_substitute_symbol_space[1] && utf[2] == f_utf_substitute_symbol_space[2]) { return f_true; } - if (character.byte_1 == f_utf_substitute_open_box[0] && character.byte_2 == f_utf_substitute_open_box[1] && character.byte_3 == f_utf_substitute_open_box[2]) { + if (utf[0] == f_utf_substitute_open_box[0] && utf[1] == f_utf_substitute_open_box[1] && utf[2] == f_utf_substitute_open_box[2]) { return f_true; } - if (character.byte_1 == f_utf_substitute_open_box_shouldered[0] && character.byte_2 == f_utf_substitute_open_box_shouldered[1] && character.byte_3 == f_utf_substitute_open_box_shouldered[2]) { + if (utf[0] == f_utf_substitute_open_box_shouldered[0] && utf[1] == f_utf_substitute_open_box_shouldered[1] && utf[2] == f_utf_substitute_open_box_shouldered[2]) { return f_true; } @@ -518,14 +514,16 @@ extern "C" { #ifndef _di_f_utf_is_substitute_character_ f_return_status f_utf_is_substitute_character(const f_utf_character character) { - f_u_short width = f_macro_utf_byte_width(character.byte_1); + f_u_short width = f_macro_utf_character_width(character); if (width == 1) { return f_false; } if (width == 2) { - if (character.byte_1 == f_utf_substitute_middle_dot[0] && character.byte_2 == f_utf_substitute_middle_dot[1]) { + char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) }; + + if (utf[0] == f_utf_substitute_middle_dot[0] && utf[1] == f_utf_substitute_middle_dot[1]) { return f_true; } @@ -533,19 +531,21 @@ extern "C" { } if (width == 3) { - if (character.byte_1 == f_utf_substitute_symbol_blank[0] && character.byte_2 == f_utf_substitute_symbol_blank[1] && character.byte_3 == f_utf_substitute_symbol_blank[2]) { + char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) }; + + if (utf[0] == f_utf_substitute_symbol_blank[0] && utf[1] == f_utf_substitute_symbol_blank[1] && utf[2] == f_utf_substitute_symbol_blank[2]) { return f_true; } - if (character.byte_1 == f_utf_substitute_symbol_space[0] && character.byte_2 == f_utf_substitute_symbol_space[1] && character.byte_3 == f_utf_substitute_symbol_space[2]) { + if (utf[0] == f_utf_substitute_symbol_space[0] && utf[1] == f_utf_substitute_symbol_space[1] && utf[2] == f_utf_substitute_symbol_space[2]) { return f_true; } - if (character.byte_1 == f_utf_substitute_open_box[0] && character.byte_2 == f_utf_substitute_open_box[1] && character.byte_3 == f_utf_substitute_open_box[2]) { + if (utf[0] == f_utf_substitute_open_box[0] && utf[1] == f_utf_substitute_open_box[1] && utf[2] == f_utf_substitute_open_box[2]) { return f_true; } - if (character.byte_1 == f_utf_substitute_open_box_shouldered[0] && character.byte_2 == f_utf_substitute_open_box_shouldered[1] && character.byte_3 == f_utf_substitute_open_box_shouldered[2]) { + if (utf[0] == f_utf_substitute_open_box_shouldered[0] && utf[1] == f_utf_substitute_open_box_shouldered[1] && utf[2] == f_utf_substitute_open_box_shouldered[2]) { return f_true; } @@ -558,22 +558,24 @@ extern "C" { #ifndef _di_f_utf_is_whitespace_character_ f_return_status f_utf_is_whitespace_character(const f_utf_character character) { - f_u_short width = f_macro_utf_byte_width(character.byte_1); + f_u_short width = f_macro_utf_character_width(character); if (width == 1) { return f_false; } if (width == 2) { - if (character.byte_1 == f_utf_space_no_break[0] && character.byte_2 == f_utf_space_no_break[1]) { + char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) }; + + if (utf[0] == f_utf_space_no_break[0] && utf[1] == f_utf_space_no_break[1]) { return f_true; } - if (character.byte_1 == f_utf_space_line_feed_reverse[0] && character.byte_2 == f_utf_space_line_feed_reverse[1]) { + if (utf[0] == f_utf_space_line_feed_reverse[0] && utf[1] == f_utf_space_line_feed_reverse[1]) { return f_true; } - if (character.byte_1 == f_utf_space_line_next[0] && character.byte_2 == f_utf_space_line_next[1]) { + if (utf[0] == f_utf_space_line_next[0] && utf[1] == f_utf_space_line_next[1]) { return f_true; } @@ -581,75 +583,77 @@ extern "C" { } if (width == 3) { - if (character.byte_1 == f_utf_space_no_break_narrow[0] && character.byte_2 == f_utf_space_no_break_narrow[1] && character.byte_3 == f_utf_space_no_break_narrow[2]) { + char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) }; + + if (utf[0] == f_utf_space_no_break_narrow[0] && utf[1] == f_utf_space_no_break_narrow[1] && utf[2] == f_utf_space_no_break_narrow[2]) { return f_true; } - if (character.byte_1 == f_utf_space_en[0] && character.byte_2 == f_utf_space_en[1] && character.byte_3 == f_utf_space_en[2]) { + if (utf[0] == f_utf_space_en[0] && utf[1] == f_utf_space_en[1] && utf[2] == f_utf_space_en[2]) { return f_true; } - if (character.byte_1 == f_utf_space_en_quad[0] && character.byte_2 == f_utf_space_en_quad[1] && character.byte_3 == f_utf_space_en_quad[2]) { + if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) { return f_true; } - if (character.byte_1 == f_utf_space_en_quad[0] && character.byte_2 == f_utf_space_en_quad[1] && character.byte_3 == f_utf_space_en_quad[2]) { + if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em[0] && character.byte_2 == f_utf_space_em[1] && character.byte_3 == f_utf_space_em[2]) { + if (utf[0] == f_utf_space_em[0] && utf[1] == f_utf_space_em[1] && utf[2] == f_utf_space_em[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em_quad[0] && character.byte_2 == f_utf_space_em_quad[1] && character.byte_3 == f_utf_space_em_quad[2]) { + if (utf[0] == f_utf_space_em_quad[0] && utf[1] == f_utf_space_em_quad[1] && utf[2] == f_utf_space_em_quad[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em_per_three[0] && character.byte_2 == f_utf_space_em_per_three[1] && character.byte_3 == f_utf_space_em_per_three[2]) { + if (utf[0] == f_utf_space_em_per_three[0] && utf[1] == f_utf_space_em_per_three[1] && utf[2] == f_utf_space_em_per_three[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em_per_four[0] && character.byte_2 == f_utf_space_em_per_four[1] && character.byte_3 == f_utf_space_em_per_four[2]) { + if (utf[0] == f_utf_space_em_per_four[0] && utf[1] == f_utf_space_em_per_four[1] && utf[2] == f_utf_space_em_per_four[2]) { return f_true; } - if (character.byte_1 == f_utf_space_em_per_six[0] && character.byte_2 == f_utf_space_em_per_six[1] && character.byte_3 == f_utf_space_em_per_six[2]) { + if (utf[0] == f_utf_space_em_per_six[0] && utf[1] == f_utf_space_em_per_six[1] && utf[2] == f_utf_space_em_per_six[2]) { return f_true; } - if (character.byte_1 == f_utf_space_figure[0] && character.byte_2 == f_utf_space_figure[1] && character.byte_3 == f_utf_space_figure[2]) { + if (utf[0] == f_utf_space_figure[0] && utf[1] == f_utf_space_figure[1] && utf[2] == f_utf_space_figure[2]) { return f_true; } - if (character.byte_1 == f_utf_space_punctuation[0] && character.byte_2 == f_utf_space_punctuation[1] && character.byte_3 == f_utf_space_punctuation[2]) { + if (utf[0] == f_utf_space_punctuation[0] && utf[1] == f_utf_space_punctuation[1] && utf[2] == f_utf_space_punctuation[2]) { return f_true; } - if (character.byte_1 == f_utf_space_thin[0] && character.byte_2 == f_utf_space_thin[1] && character.byte_3 == f_utf_space_thin[2]) { + if (utf[0] == f_utf_space_thin[0] && utf[1] == f_utf_space_thin[1] && utf[2] == f_utf_space_thin[2]) { return f_true; } - if (character.byte_1 == f_utf_space_hair[0] && character.byte_2 == f_utf_space_hair[1] && character.byte_3 == f_utf_space_hair[2]) { + if (utf[0] == f_utf_space_hair[0] && utf[1] == f_utf_space_hair[1] && utf[2] == f_utf_space_hair[2]) { return f_true; } - if (character.byte_1 == f_utf_space_separator_line[0] && character.byte_2 == f_utf_space_separator_line[1] && character.byte_3 == f_utf_space_separator_line[2]) { + if (utf[0] == f_utf_space_separator_line[0] && utf[1] == f_utf_space_separator_line[1] && utf[2] == f_utf_space_separator_line[2]) { return f_true; } - if (character.byte_1 == f_utf_space_separator_paragraph[0] && character.byte_2 == f_utf_space_separator_paragraph[1] && character.byte_3 == f_utf_space_separator_paragraph[2]) { + if (utf[0] == f_utf_space_separator_paragraph[0] && utf[1] == f_utf_space_separator_paragraph[1] && utf[2] == f_utf_space_separator_paragraph[2]) { return f_true; } - if (character.byte_1 == f_utf_space_ogham[0] && character.byte_2 == f_utf_space_ogham[1] && character.byte_3 == f_utf_space_ogham[2]) { + if (utf[0] == f_utf_space_ogham[0] && utf[1] == f_utf_space_ogham[1] && utf[2] == f_utf_space_ogham[2]) { return f_true; } - if (character.byte_1 == f_utf_space_ideographic[0] && character.byte_2 == f_utf_space_ideographic[1] && character.byte_3 == f_utf_space_ideographic[2]) { + if (utf[0] == f_utf_space_ideographic[0] && utf[1] == f_utf_space_ideographic[1] && utf[2] == f_utf_space_ideographic[2]) { return f_true; } - if (character.byte_1 == f_utf_space_medium_mathematical[0] && character.byte_2 == f_utf_space_medium_mathematical[1] && character.byte_3 == f_utf_space_medium_mathematical[2]) { + if (utf[0] == f_utf_space_medium_mathematical[0] && utf[1] == f_utf_space_medium_mathematical[1] && utf[2] == f_utf_space_medium_mathematical[2]) { return f_true; } @@ -660,43 +664,49 @@ extern "C" { } #endif // _di_f_utf_is_whitespace_character_ -#ifndef _di_f_utf_string_to_character_ - f_return_status f_utf_string_to_character(const f_string character_string, const f_u_short max_width, f_utf_character *utf_character) { +#ifndef _di_f_utf_char_to_character_ + f_return_status f_utf_char_to_character(const f_string character, const f_u_short max_width, f_utf_character *utf_character) { #ifndef _di_level_0_parameter_checking_ if (max_width < 1) return f_error_set_error(f_invalid_parameter); + if (utf_character == 0) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ - f_u_short width = f_macro_utf_byte_width_is(character_string[0]); + f_u_short width = f_macro_utf_byte_width(*character); + + if (width == 1) { + *utf_character = f_macro_utf_character_from_char_1(character[0]); + return f_none; + } - if (width >= max_width) { + if (width > max_width) { return f_error_set_error(f_failure); } memset(utf_character, 0, sizeof(f_utf_character)); - utf_character->byte_1 = character_string[0]; + *utf_character |= f_macro_utf_character_to_char_1(character[0]); if (width < 2) { return f_none; } - utf_character->byte_2 = character_string[1]; + *utf_character |= f_macro_utf_character_to_char_2(character[1]); if (width == 2) { return f_none; } - utf_character->byte_3 = character_string[2]; + *utf_character |= f_macro_utf_character_to_char_3(character[2]); if (width == 3) { return f_none; } - utf_character->byte_4 = character_string[3]; + *utf_character |= f_macro_utf_character_to_char_4(character[3]); return f_none; } -#endif // _di_f_utf_string_to_character_ +#endif // _di_f_utf_char_to_character_ #ifdef __cplusplus } // extern "C" diff --git a/level_0/f_utf/c/utf.h b/level_0/f_utf/c/utf.h index e3569ec..29ec202 100644 --- a/level_0/f_utf/c/utf.h +++ b/level_0/f_utf/c/utf.h @@ -61,28 +61,6 @@ extern "C" { #endif // _di_f_utf_bom_ /** - * Provide a basic UTF-8 character. - * - * This is intended to be used so that a single path parameter can be passed to a function instead of an array of characters. - */ -#ifndef _di_f_utf_character_ - typedef struct { - char byte_1; - char byte_2; - char byte_3; - char byte_4; - } f_utf_character; - - #define f_utf_character_initialize \ - { \ - '\0', \ - '\0', \ - '\0', \ - '\0', \ - } -#endif // _di_f_utf_char_ - -/** * Define the UTF-8 bytes. * * The bytes are for checking a single 8-bit character value (specifically, checking the first bits). @@ -94,7 +72,6 @@ extern "C" { * The f_macro_utf_byte_is_* macros are used to determine a width of the character (either 1, 2, 3, or 4, respectively). * * The f_macro_utf_byte_width macro determines a width of the character. - * * The f_macro_utf_byte_width_is is identical to f_macro_utf_byte_width, except it returns 0 when character is not UTF-8. */ #ifndef _di_f_utf_byte_ @@ -120,6 +97,53 @@ extern "C" { #endif // _di_f_utf_byte_ /** + * Provide a basic UTF-8 character as a single 4-byte variable. + * + * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte character. + * + * The byte structure is intended to be read left to right. + * + * The f_macro_utf_character_mask_byte_* are used to get the entire character set fo a given width. + * + * The f_macro_utf_character_mask_char_* are used to get a specific UTF-8 block as a single character range. + * + * The f_macro_utf_character_to_char_* are used to convert a f_utf_character into a char, for a given 8-bit block. + * + * The f_macro_utf_character_from_char_* are used to convert a char into part of a f_utf_character, for a given 8-bit block. + * + * The f_macro_utf_character_width is used to determine the width of the UTF-8 character based on f_macro_utf_byte_width. + * The f_macro_utf_character_width_is is used to determine the width of the UTF-8 character based on f_macro_utf_byte_width_is. + */ +#ifndef _di_f_utf_character_ + typedef uint32_t f_utf_character; + + #define f_macro_utf_character_mask_bom 0xefbbbf00 // 1110 1111, 1011 1011, 1011 1111, 0000 0000 + + #define f_macro_utf_character_mask_byte_1 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 + #define f_macro_utf_character_mask_byte_2 0xffff0000 // 1111 1111, 1111 1111, 0000 0000, 0000 0000 + #define f_macro_utf_character_mask_byte_3 0xffffff00 // 1111 1111, 1111 1111, 1111 1111, 0000 0000 + #define f_macro_utf_character_mask_byte_4 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111 + + #define f_macro_utf_character_mask_char_1 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 + #define f_macro_utf_character_mask_char_2 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000 + #define f_macro_utf_character_mask_char_3 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000 + #define f_macro_utf_character_mask_char_4 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 + + #define f_macro_utf_character_to_char_1(character) ((f_macro_utf_character_mask_char_1 & character) >> 24) // grab first byte. + #define f_macro_utf_character_to_char_2(character) ((f_macro_utf_character_mask_char_2 & character) >> 16) // grab second byte. + #define f_macro_utf_character_to_char_3(character) ((f_macro_utf_character_mask_char_3 & character) >> 8) // grab third byte. + #define f_macro_utf_character_to_char_4(character) (f_macro_utf_character_mask_char_4 & character) // grab fourth byte. + + #define f_macro_utf_character_from_char_1(character) (character << 24) // shift the first byte. + #define f_macro_utf_character_from_char_2(character) (character << 16) // shift the second byte. + #define f_macro_utf_character_from_char_3(character) (character << 8) // shift the third byte. + #define f_macro_utf_character_from_char_4(character) (character) // shift the fourth byte. + + #define f_macro_utf_character_width(character) (f_macro_utf_byte_width(f_macro_utf_character_to_char_1(character))) + #define f_macro_utf_character_width_is(character) (f_macro_utf_byte_width_is(f_macro_utf_character_to_char_1(character))) +#endif // _di_f_utf_character_ + +/** * Define the UTF-8 general whitespace codes. * * These are integers representing character codes that represent types of spaces. @@ -223,9 +247,9 @@ extern "C" { * f_maybe (with error bit) if this could be a whitespace or substitute but width is not long enough. * f_invalid_parameter (with error bit) if a parameter is invalid. */ -#ifndef _di_f_utf_is_bom_string_ - extern f_return_status f_utf_is_bom_string(const f_string character, const f_u_short max_width); -#endif // _di_f_utf_is_bom_string_ +#ifndef _di_f_utf_is_bom_ + extern f_return_status f_utf_is_bom(const f_string character, const f_u_short max_width); +#endif // _di_f_utf_is_bom_ /** * Check to see if the entire byte block of the character is a UTF-8 printable character. @@ -245,9 +269,9 @@ extern "C" { * f_maybe (with error bit) if this could be a graph but width is not long enough. * f_invalid_parameter (with error bit) if a parameter is invalid. */ -#ifndef _di_f_utf_is_graph_string_ - extern f_return_status f_utf_is_graph_string(const f_string character, const f_u_short max_width); -#endif // _di_f_utf_is_graph_string_ +#ifndef _di_f_utf_is_graph_ + extern f_return_status f_utf_is_graph(const f_string character, const f_u_short max_width); +#endif // _di_f_utf_is_graph_ /** * Check to see if the entire byte block of the character is a UTF-8 whitespace or substitute character. @@ -267,9 +291,9 @@ extern "C" { * f_maybe (with error bit) if this could be a whitespace or substitute but width is not long enough. * f_invalid_parameter (with error bit) if a parameter is invalid. */ -#ifndef _di_f_utf_is_space_string_ - extern f_return_status f_utf_is_space_string(const f_string character, const f_u_short max_width); -#endif // _di_f_utf_is_space_string_ +#ifndef _di_f_utf_is_space_ + extern f_return_status f_utf_is_space(const f_string character, const f_u_short max_width); +#endif // _di_f_utf_is_space_ /** * Check to see if the entire byte block of the character is a UTF-8 whitespace substitute character. @@ -289,9 +313,9 @@ extern "C" { * f_maybe (with error bit) if this could be a substitute but width is not long enough. * f_invalid_parameter (with error bit) if a parameter is invalid. */ -#ifndef _di_f_utf_is_substitute_string_ - extern f_return_status f_utf_is_substitute_string(const f_string character, const f_u_short max_width); -#endif // _di_f_utf_is_substitute_string_ +#ifndef _di_f_utf_is_substitute_ + extern f_return_status f_utf_is_substitute(const f_string character, const f_u_short max_width); +#endif // _di_f_utf_is_substitute_ /** * Check to see if the entire byte block of the character is a UTF-8 general whitespace character. @@ -311,9 +335,9 @@ extern "C" { * f_maybe (with error bit) if this could be a whitespace but width is not long enough. * f_invalid_parameter (with error bit) if a parameter is invalid. */ -#ifndef _di_f_utf_is_whitespace_string_ - extern f_return_status f_utf_is_whitespace_string(const f_string character, const f_u_short max_width); -#endif // _di_f_utf_is_whitespace_string_ +#ifndef _di_f_utf_is_whitespace_ + extern f_return_status f_utf_is_whitespace(const f_string character, const f_u_short max_width); +#endif // _di_f_utf_is_whitespace_ /** * Check to see if the entire byte block of the character is a UTF-8 BOM. @@ -404,7 +428,7 @@ extern "C" { * * This will also convert ASCII characters. * - * @param character_string + * @param character * The character string to be converted to the f_utf_character type. * There must be enough space allocated to convert against, as limited by max_width. * @param max_width @@ -419,9 +443,9 @@ extern "C" { * f_failure (with error bit) if width is not long enough to convert. * f_invalid_parameter (with error bit) if a parameter is invalid. */ -#ifndef _di_f_utf_string_to_character_ - extern f_return_status f_utf_string_to_character(const f_string character_string, const f_u_short max_width, f_utf_character *utf_character); -#endif // _di_f_utf_string_to_character_ +#ifndef _di_f_utf_char_to_character_ + extern f_return_status f_utf_char_to_character(const f_string character, const f_u_short max_width, f_utf_character *utf_character); +#endif // _di_f_utf_char_to_character_ #ifdef __cplusplus } // extern "C" diff --git a/level_1/fl_fss/c/fss.c b/level_1/fl_fss/c/fss.c index 73edd32..ac42d11 100644 --- a/level_1/fl_fss/c/fss.c +++ b/level_1/fl_fss/c/fss.c @@ -49,7 +49,7 @@ extern "C" { // A single UTF-8 BOM is allowed to exist before the valid FSS identifier. if (buffer.used > 3) { - f_status status = f_utf_is_bom_string(buffer.string, 4); + f_status status = f_utf_is_bom(buffer.string, 4); if (f_error_is_error(status)) { return f_error_set_error(fl_fss_no_header); @@ -297,7 +297,7 @@ extern "C" { max_width = buffer.used - input.start; } - f_status status = f_utf_is_space_string(buffer.string + input.start, max_width); + f_status status = f_utf_is_space(buffer.string + input.start, max_width); if (f_error_is_error(status)) { return status; @@ -336,7 +336,7 @@ extern "C" { max_width = buffer.used - input.start; } - f_status status = f_utf_is_space_string(buffer.string + input.start, max_width); + f_status status = f_utf_is_space(buffer.string + input.start, max_width); if (f_error_is_error(status)) { return status; @@ -370,8 +370,8 @@ extern "C" { if (buffer.string[input->start] != f_fss_delimit_placeholder) { max_width = (input->stop - input->start) + 1; - if (f_utf_is_space_string(buffer.string +input->start, max_width) != f_true) { - if (f_utf_is_bom_string(buffer.string + input->start, max_width) != f_true) { + if (f_utf_is_space(buffer.string +input->start, max_width) != f_true) { + if (f_utf_is_bom(buffer.string + input->start, max_width) != f_true) { break; } } @@ -402,8 +402,8 @@ extern "C" { if (buffer.string[input->start] != f_fss_delimit_placeholder) { max_width = (input->stop - input->start) + 1; - if (f_utf_is_space_string(buffer.string + input->start, max_width) != f_true) { - if (f_utf_is_bom_string(buffer.string + input->start, max_width) != f_true) { + if (f_utf_is_space(buffer.string + input->start, max_width) != f_true) { + if (f_utf_is_bom(buffer.string + input->start, max_width) != f_true) { break; } } diff --git a/level_1/fl_fss/c/fss.h b/level_1/fl_fss/c/fss.h index 43a896c..c781db2 100644 --- a/level_1/fl_fss/c/fss.h +++ b/level_1/fl_fss/c/fss.h @@ -47,10 +47,10 @@ extern "C" { * * @return * f_none on success. - * f_none_on_stop if the stop point is reached before all steps are completed. + * f_none_on_stop if the stop location is reached before all steps are completed. * f_none_on_eos if the end of buffer is reached before all steps are completed. * f_invalid_parameter (with error bit) if a parameter is invalid. - * f_incomplete_utf_on_stop (with error bit) if the stop point is reached before the complete UTF-8 character can be processed. + * f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed. * f_incomplete_utf_on_eos (with error bit) if the end of buffer is reached before the complete UTF-8 character can be processed. */ #ifndef _di_fl_fss_decrement_buffer_ @@ -118,10 +118,10 @@ extern "C" { * * @return * f_none on success. - * f_none_on_stop if the stop point is reached before all steps are completed. + * f_none_on_stop if the stop location is reached before all steps are completed. * f_none_on_eos if the end of buffer is reached before all steps are completed. * f_invalid_parameter (with error bit) if a parameter is invalid. - * f_incomplete_utf_on_stop (with error bit) if the stop point is reached before the complete UTF-8 character can be processed. + * f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed. * f_incomplete_utf_on_eos (with error bit) if the end of buffer is reached before the complete UTF-8 character can be processed. */ #ifndef _di_fl_fss_increment_buffer_ diff --git a/level_1/fl_fss/c/fss_basic.c b/level_1/fl_fss/c/fss_basic.c index cdd12cf..3a3adb3 100644 --- a/level_1/fl_fss/c/fss_basic.c +++ b/level_1/fl_fss/c/fss_basic.c @@ -435,7 +435,7 @@ extern "C" { ++input->start; } while (f_true); - // Save the stop point + // Save the stop location found->array[found->used].stop = input->start - 1; found->used++; diff --git a/level_1/fl_serialized/c/serialized.c b/level_1/fl_serialized/c/serialized.c index b81da09..4d77ce5 100644 --- a/level_1/fl_serialized/c/serialized.c +++ b/level_1/fl_serialized/c/serialized.c @@ -12,9 +12,8 @@ extern "C" { f_status status = f_none; - if (serialized->used + value.used + 1 >= serialized->size) { - f_resize_dynamic_string(status, (*serialized), serialized->size + value.used + f_serialized_default_allocation_step); + f_resize_dynamic_string(status, (*serialized), serialized->size + value.used + 1); if (f_error_is_error(status)) return status; } @@ -33,8 +32,8 @@ extern "C" { } #endif // _di_fl_serialize_simple_ -#ifndef _di_fl_unserialize_simple_ - f_return_status fl_unserialize_simple(const f_dynamic_string serialized, f_string_locations *locations) { +#ifndef _di_fl_unserialize_simple_map_ + f_return_status fl_unserialize_simple_map(const f_dynamic_string serialized, f_string_locations *locations) { #ifndef _di_level_0_parameter_checking_ if (locations == 0) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ @@ -44,15 +43,20 @@ extern "C" { f_array_length i = 0; f_array_length start = 0; - while (i <= serialized.used) { + f_u_short width = 0; + + while (i < serialized.used) { + width = f_macro_utf_byte_width(serialized.string[i]); + if (serialized.string[i] == f_serialized_simple_splitter || i == serialized.used) { - if (locations->used + 1 >= locations->size) { + if (locations->used + width >= locations->size) { f_resize_string_locations(status, (*locations), locations->size + f_serialized_default_allocation_step); if (f_error_is_error(status)) return status; } if (start == i) { + // provide an invalid start to stop range to communicate that there is no data. locations->array[locations->used].start = 1; locations->array[locations->used].stop = 0; locations->used++; @@ -63,18 +67,25 @@ extern "C" { locations->used++; } - start = i + 1; + if (i + width > serialized.used) { + return f_error_set_error(f_incomplete_utf_on_eos); + } + + start = i + width; + } + else if (i + width > serialized.used) { + return f_error_set_error(f_incomplete_utf_on_eos); } - i++; + i += width; } // while return f_none; } -#endif // _di_fl_unserialize_simple_ +#endif // _di_fl_unserialize_simple_map_ -#ifndef _di_fl_unserialize_simple_get_ - f_return_status fl_unserialize_simple_get(const f_dynamic_string serialized, const f_array_length index, f_string_location *location) { +#ifndef _di_fl_unserialize_simple_find_ + f_return_status fl_unserialize_simple_find(const f_dynamic_string serialized, const f_array_length index, f_string_location *location) { #ifndef _di_level_0_parameter_checking_ if (location == 0) return f_error_set_error(f_invalid_parameter); #endif // _di_level_0_parameter_checking_ @@ -82,33 +93,51 @@ extern "C" { f_status status = f_none; f_array_length i = 0; + f_array_length start = 0; f_array_length current = 0; - location->start = 1; - location->stop = 0; + f_u_short width = 0; while (i < serialized.used) { - if (current == index) { - if (location->start > location->stop) { - location->start = i; - location->stop = i; + width = f_macro_utf_byte_width(serialized.string[i]); + + if (serialized.string[i] == f_serialized_simple_splitter) { + if (current == index) { + if (start == i) { + // provide an invalid start to stop range to communicate that there is no data. + location->start = 1; + location->stop = 0; + } + else { + location->start = start; + location->stop = i - 1; + } + + return f_none; } - if (serialized.string[i] == f_serialized_simple_splitter) { + start = i + width; + current++; + } + else if (i == serialized.used) { + if (current == index) { + location->start = start; location->stop = i - 1; - break; } + + return f_none_on_eos; } - else if (serialized.string[i] == f_serialized_simple_splitter) { - current++; + + if (i + width > serialized.used) { + return f_error_set_error(f_incomplete_utf_on_eos); } - i++; + i += width; } // while - return f_none; + return f_no_data_on_eos; } -#endif // _di_fl_unserialize_simple_get_ +#endif // _di_fl_unserialize_simple_find_ #ifdef __cplusplus } // extern "C" diff --git a/level_1/fl_serialized/c/serialized.h b/level_1/fl_serialized/c/serialized.h index db4882d..3294f30 100644 --- a/level_1/fl_serialized/c/serialized.h +++ b/level_1/fl_serialized/c/serialized.h @@ -20,31 +20,92 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { #endif +/** + * Serialized a string using the Simple serialize algorithm. + * + * The simple Serialize algorithm is akin to the PATH environment variable, example: PATH="/bin:/sbin:/usr/bin". + * + * To generate the above example, this would be called 3 times, with the following strings: + * 1) value = "/bin", then: PATH="/bin". + * 2) value = "/sbin", then: PATH="/bin:/sbin". + * 3) value = "/usr/sbin", then: PATH="/bin:/sbin:/usr/sbin". + * + * @param value + * The string to append onto serialized. + * @param serialized + * The dynamic string that represents a serialized set of strings. + * + * @return + * f_none on success. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * f_reallocation_error (with error bit) on memory reallocation error. + */ #ifndef _di_fl_serialize_simple_ - /** - * this function will append a string to the serialize. - */ extern f_return_status fl_serialize_simple(const f_dynamic_string value, f_dynamic_string *serialized); #endif // _di_fl_serialize_simple_ -#ifndef _di_fl_unserialize_simple_ - /** - * this function will unserialize a serialized string and store the results in an array of strings. - */ - extern f_return_status fl_unserialize_simple(const f_dynamic_string serialized, f_string_locations *locations); -#endif // _di_fl_unserialize_ - -#ifndef _di_fl_unserialize_simple_get_ - /** - * this function will pull a single serialized value from the serialized string at the given index. - */ - extern f_return_status fl_unserialize_simple_get(const f_dynamic_string serialized, const f_array_length index, f_string_location *location); -#endif // _di_fl_unserialize_simple_get_ +// @todo: implement fl_unserialize_simple() such that a new array of strings is allocated. + +/** + * Identify string positions within a serialized string using the Simple serialize algorithm. + * + * The simple Serialize algorithm is akin to the PATH environment variable, example: PATH="/bin:/sbin:/usr/bin". + * + * After processing the above example, there would be the following positions: + * 1) start = 0, stop = 3. + * 2) start = 5, stop = 9. + * 3) start = 11, stop = 18. + * + * @param serialized + * A serialized string to de-serialize. + * @param locations + * The locations within the serialized string representing distinct separate strings. + * + * @return + * f_none on success. + * f_incomplete_utf_on_eos if end of sting is reached before a complete UTF-8 character can be processed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * f_reallocation_error (with error bit) on memory reallocation error. + */ +#ifndef _di_fl_unserialize_simple_map_ + extern f_return_status fl_unserialize_simple_map(const f_dynamic_string serialized, f_string_locations *locations); +#endif // _di_fl_unserialize_simple_map_ + +/** + * Unserialize a specific string using the Simple serialize algorithm. + * + * The simple Serialize algorithm is akin to the PATH environment variable, example: PATH="/bin:/sbin:/usr/bin". + * + * After processing the above example, there would be the following positions, for the given index: + * 1) with index = 0, start = 0, stop = 3. + * 2) with index = 1, start = 5, stop = 9. + * 3) with index = 2, start = 11, stop = 18. + * + * @param serialized + * A serialized string to de-serialize. + * @param index + * An index position within the serialized string to get the deserialized positions of. + * @param location + * A location within the serialized string representing the string at the given index. + * + * @return + * f_none on success. + * f_none_on_eos on success at end of string. + * f_no_data_on_eos if end of string reached before index was reached. + * f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + */ +#ifndef _di_fl_unserialize_simple_find_ + extern f_return_status fl_unserialize_simple_find(const f_dynamic_string serialized, const f_array_length index, f_string_location *location); +#endif // _di_fl_unserialize_simple_find_ + +// @todo: implement fl_unserialize_simple_get() such that a new string is allocated, if found. #ifdef __cplusplus } // extern "C" diff --git a/level_1/fl_serialized/data/build/dependencies b/level_1/fl_serialized/data/build/dependencies index 7a98ab9..a34ebfa 100644 --- a/level_1/fl_serialized/data/build/dependencies +++ b/level_1/fl_serialized/data/build/dependencies @@ -3,3 +3,4 @@ f_errors f_strings f_serialized f_memory +f_utf diff --git a/level_1/fl_serialized/data/build/settings b/level_1/fl_serialized/data/build/settings index 6bab136..ddc683d 100644 --- a/level_1/fl_serialized/data/build/settings +++ b/level_1/fl_serialized/data/build/settings @@ -10,9 +10,9 @@ version_micro 0 build_compiler gcc build_linker ar build_libraries -lc -build_libraries_fll -lf_memory +build_libraries_fll -lf_memory -lf_utf build_sources_library serialized.c -build_sources_program +build_sources_program build_sources_headers serialized.h build_sources_bash build_sources_settings diff --git a/level_1/fl_strings/c/strings.c b/level_1/fl_strings/c/strings.c index a7d8387..9346025 100644 --- a/level_1/fl_strings/c/strings.c +++ b/level_1/fl_strings/c/strings.c @@ -4,32 +4,34 @@ extern "C" { #endif -// TODO: this file needs to be rewriten with UTF-8 support. - #ifndef _di_fl_rip_string_ - f_return_status fl_rip_string(const f_dynamic_string buffer, const f_string_location position, f_dynamic_string *results) { + f_return_status fl_rip_string(const f_dynamic_string buffer, const f_string_location location, f_dynamic_string *result) { #ifndef _di_level_1_parameter_checking_ - if (results == 0) return f_error_set_error(f_invalid_parameter); - if (position.start < 0) return f_error_set_error(f_invalid_parameter); - if (position.stop < position.start) return f_error_set_error(f_invalid_parameter); + if (location.start < 0) return f_error_set_error(f_invalid_parameter); + if (location.stop < location.start) return f_error_set_error(f_invalid_parameter); if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter); - if (position.start >= buffer.used) return f_error_set_error(f_invalid_parameter); + if (location.start >= buffer.used) return f_error_set_error(f_invalid_parameter); #endif // _di_level_1_parameter_checking_ // the start and stop point are inclusive locations, and therefore start - stop is actually 1 too few locations - f_string_length size = position.stop - position.start + 1; + f_string_length size = (location.stop - location.start) + 1; if (size > 0) { f_status status = f_none; - f_resize_dynamic_string(status, (*results), size); + if (result == 0) { + f_new_dynamic_string(status, (*result), size); + } + else { + f_resize_dynamic_string(status, (*result), size); + } if (f_error_is_error(status)) { return status; } - memcpy(results->string, buffer.string + position.start, sizeof(char) * size); - results->used = size; + memcpy(result->string, buffer.string + location.start, sizeof(char) * size); + result->used = size; return f_none; } @@ -38,73 +40,268 @@ extern "C" { } #endif // _di_fl_rip_string_ -#ifndef _di_fl_seek_line_past_non_graph_ - f_return_status fl_seek_line_past_non_graph(const f_dynamic_string buffer, f_string_location *position, const char placeholder) { +#ifndef _di_fl_seek_line_until_graph_ + f_return_status fl_seek_line_until_graph(const f_dynamic_string buffer, f_string_location *location, const char placeholder) { #ifndef _di_level_1_parameter_checking_ - if (position == 0) return f_error_set_error(f_invalid_parameter); - if (position->start < 0) return f_error_set_error(f_invalid_parameter); - if (position->stop < position->start) return f_error_set_error(f_invalid_parameter); + if (location == 0) return f_error_set_error(f_invalid_parameter); + if (location->start < 0) return f_error_set_error(f_invalid_parameter); + if (location->stop < location->start) return f_error_set_error(f_invalid_parameter); if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter); - if (position->start >= buffer.used) return f_error_set_error(f_invalid_parameter); + if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter); #endif // _di_level_1_parameter_checking_ - while (!isgraph(buffer.string[position->start]) || buffer.string[position->start] == placeholder) { - if (buffer.string[position->start] == f_eol) return f_none; + f_status status = f_none; + f_u_short width = 0; + + f_string_length max_width = (location->stop - location->start) + 1; + + if (max_width > buffer.used - location->start) { + max_width = buffer.used - location->start; + } + + while (buffer.string[location->start] == placeholder || (!isgraph(buffer.string[location->start]) && (status = f_utf_is_graph(buffer.string + location->start, max_width)) == f_false)) { + if (buffer.string[location->start] == f_eol) return f_none_on_eol; + + width = f_macro_utf_byte_width(buffer.string[location->start]); + + if (width > 1) { + if (location->start + width >= buffer.used) return f_error_set_error(f_incomplete_utf_on_eos); + if (location->start + width > location->stop) return f_error_set_error(f_incomplete_utf_on_stop); + } + + location->start += width; + + if (location->start >= buffer.used) return f_none_on_eos; + if (location->start > location->stop) return f_none_on_stop; - ++position->start; + max_width = (location->stop - location->start) + 1; - if (position->start >= buffer.used) return f_none_on_eos; - if (position->start > position->stop) return f_none_on_stop; + if (max_width > buffer.used - location->start) { + max_width = buffer.used - location->start; + } } // while + if (f_error_is_error(status)) { + return status; + } + return f_none; } -#endif // _di_fl_seek_line_past_non_graph_ +#endif // _di_fl_seek_line_until_graph_ #ifndef _di_fl_seek_line_until_non_graph_ - f_return_status fl_seek_line_until_non_graph(const f_dynamic_string buffer, f_string_location *position, const char placeholder) { + f_return_status fl_seek_line_until_non_graph(const f_dynamic_string buffer, f_string_location *location, const char placeholder) { #ifndef _di_level_1_parameter_checking_ - if (position->start < 0) return f_error_set_error(f_invalid_parameter); - if (position->stop < position->start) return f_error_set_error(f_invalid_parameter); + if (location == 0) return f_error_set_error(f_invalid_parameter); + if (location->start < 0) return f_error_set_error(f_invalid_parameter); + if (location->stop < location->start) return f_error_set_error(f_invalid_parameter); if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter); - if (position->start >= buffer.used) return f_error_set_error(f_invalid_parameter); + if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter); #endif // _di_level_1_parameter_checking_ - while (isgraph(buffer.string[position->start]) || buffer.string[position->start] == placeholder) { - if (buffer.string[position->start] == f_eol) return f_none; + f_status status = f_none; + f_u_short width = 0; + + f_string_length max_width = (location->stop - location->start) + 1; + + if (max_width > buffer.used - location->start) { + max_width = buffer.used - location->start; + } + + while (buffer.string[location->start] == placeholder || (isgraph(buffer.string[location->start]) && (status = f_utf_is_space(buffer.string + location->start, max_width)) == f_false)) { + if (buffer.string[location->start] == f_eol) return f_none_on_eol; + + width = f_macro_utf_byte_width(buffer.string[location->start]); + + if (width > 1) { + if (location->start + width >= buffer.used) return f_error_set_error(f_incomplete_utf_on_eos); + if (location->start + width > location->stop) return f_error_set_error(f_incomplete_utf_on_stop); + } + + location->start += width; - ++position->start; + if (location->start >= buffer.used) return f_none_on_eos; + if (location->start > location->stop) return f_none_on_stop; - if (position->start >= buffer.used) return f_none_on_eos; - if (position->start > position->stop) return f_none_on_stop; + max_width = (location->stop - location->start) + 1; + + if (max_width > buffer.used - location->start) { + max_width = buffer.used - location->start; + } } // while + if (f_error_is_error(status)) { + return status; + } + return f_none; } #endif // _di_fl_seek_line_until_non_graph_ -#ifndef _di_fl_seek_to_ - f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *position, const char seek_to_this) { +#ifndef _di_fl_seek_line_to_ + f_return_status fl_seek_line_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this) { #ifndef _di_level_1_parameter_checking_ - if (position->start < 0) return f_error_set_error(f_invalid_parameter); - if (position->stop < position->start) return f_error_set_error(f_invalid_parameter); + if (location == 0) return f_error_set_error(f_invalid_parameter); + if (location->start < 0) return f_error_set_error(f_invalid_parameter); + if (location->stop < location->start) return f_error_set_error(f_invalid_parameter); if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter); - if (position->start >= buffer.used) return f_error_set_error(f_invalid_parameter); + if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter); #endif // _di_level_1_parameter_checking_ - while (buffer.string[position->start] != seek_to_this) { - if (buffer.string[position->start] == f_eol) return f_none; + while (buffer.string[location->start] != seek_to_this) { + if (buffer.string[location->start] == f_eol) return f_none_on_eol; - ++position->start; + location->start++; - if (position->start >= buffer.used) return f_none_on_eos; - if (position->start > position->stop) return f_none_on_stop; + if (location->start >= buffer.used) return f_none_on_eos; + if (location->start > location->stop) return f_none_on_stop; + } // while + + return f_none; + } +#endif // _di_fl_seek_line_to_ + +#ifndef _di_fl_seek_line_to_character_ + f_return_status fl_seek_line_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this) { + #ifndef _di_level_1_parameter_checking_ + if (location == 0) return f_error_set_error(f_invalid_parameter); + if (location->start < 0) return f_error_set_error(f_invalid_parameter); + if (location->stop < location->start) return f_error_set_error(f_invalid_parameter); + if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter); + if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter); + #endif // _di_level_1_parameter_checking_ + + const f_u_short seek_width = f_macro_utf_character_width(seek_to_this); + + f_status status = f_none; + + f_u_short width = 0; + + f_string_length max_width = 0; + + while (location->start < buffer.used) { + max_width = (location->stop - location->start) + 1; + + if (max_width > buffer.used - location->start) { + max_width = buffer.used - location->start; + } + + width = f_macro_utf_byte_width(buffer.string[location->start]); + + if (width == 1) { + if (buffer.string[location->start] == f_eol) return f_none_on_eol; + + if (seek_width == width) { + if (buffer.string[location->start] == seek_to_this) return f_none; + } + } + else { + if (location->start + width >= buffer.used) return f_error_set_error(f_incomplete_utf_on_eos); + if (location->start + width > location->stop) return f_error_set_error(f_incomplete_utf_on_stop); + + if (width == seek_width) { + f_utf_character character = 0; + status = f_utf_char_to_character(buffer.string + location->start, max_width, &character); + + if (f_error_is_error(status)) { + return status; + } + + if (character == seek_to_this) { + return f_none; + } + } + } + + location->start += width; + + if (location->start >= location->stop) return f_none_on_stop; + } // while + + return f_none_on_eos; + } +#endif // _di_fl_seek_line_to_character_ + +#ifndef _di_fl_seek_to_ + f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this) { + #ifndef _di_level_1_parameter_checking_ + if (location == 0) return f_error_set_error(f_invalid_parameter); + if (location->start < 0) return f_error_set_error(f_invalid_parameter); + if (location->stop < location->start) return f_error_set_error(f_invalid_parameter); + if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter); + if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter); + #endif // _di_level_1_parameter_checking_ + + while (buffer.string[location->start] != seek_to_this) { + location->start++; + + if (location->start >= buffer.used) return f_none_on_eos; + if (location->start > location->stop) return f_none_on_stop; } // while return f_none; } #endif // _di_fl_seek_to_ +#ifndef _di_fl_seek_to_character_ + f_return_status fl_seek_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this) { + #ifndef _di_level_1_parameter_checking_ + if (location == 0) return f_error_set_error(f_invalid_parameter); + if (location->start < 0) return f_error_set_error(f_invalid_parameter); + if (location->stop < location->start) return f_error_set_error(f_invalid_parameter); + if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter); + if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter); + #endif // _di_level_1_parameter_checking_ + + const f_u_short seek_width = f_macro_utf_character_width(seek_to_this); + + f_status status = f_none; + + f_u_short width = 0; + + f_string_length max_width = 0; + + while (location->start < buffer.used) { + max_width = (location->stop - location->start) + 1; + + if (max_width > buffer.used - location->start) { + max_width = buffer.used - location->start; + } + + width = f_macro_utf_byte_width(buffer.string[location->start]); + + if (width == 1) { + if (seek_width == width) { + if (buffer.string[location->start] == seek_to_this) return f_none; + } + } + else { + if (location->start + width >= buffer.used) return f_error_set_error(f_incomplete_utf_on_eos); + if (location->start + width > location->stop) return f_error_set_error(f_incomplete_utf_on_stop); + + if (width == seek_width) { + f_utf_character character = 0; + status = f_utf_char_to_character(buffer.string + location->start, max_width, &character); + + if (f_error_is_error(status)) { + return status; + } + + if (character == seek_to_this) { + return f_none; + } + } + } + + location->start += width; + + if (location->start >= location->stop) return f_none_on_stop; + } // while + + return f_none_on_eos; + } +#endif // _di_fl_seek_to_character_ + #ifndef _di_fl_compare_strings_ f_return_status fl_compare_strings(const f_string string1, const f_string string2, const f_string_length length1, const f_string_length length2) { #ifndef _di_level_1_parameter_checking_ @@ -115,22 +312,25 @@ extern "C" { f_string_length i1 = 0; f_string_length i2 = 0; - f_string_length stop1 = length1; - f_string_length stop2 = length2; + for (; i1 < length1 && i2 < length2; i1++, i2++) { + // skip past newlines in string1. + while (i1 < length1 && string1[i1] == f_eos) i1++; + if (i1 == length1) break; - for (; i1 < stop1 && i2 < stop2; i1++, i2++) { - while (i1 < stop1 && string1[i1] == f_eos) i1++; - while (i2 < stop2 && string2[i2] == f_eos) i2++; + // skip past newlines in string2. + while (i2 < length2 && string2[i2] == f_eos) i2++; + if (i2 == length2) break; if (string1[i1] != string2[i2]) return f_not_equal_to; } // for - while (i1 < stop1) { + // only return f_equal_to if all remaining characters are NULL. + while (i1 < length1) { if (string1[i1] != f_eos) return f_not_equal_to; i1++; } // while - while (i2 < stop2) { + while (i2 < length2) { if (string2[i2] != f_eos) return f_not_equal_to; i2++; } // while @@ -149,22 +349,25 @@ extern "C" { f_string_length i1 = 0; f_string_length i2 = 0; - f_string_length stop1 = string1.used; - f_string_length stop2 = string2.used; + for (; i1 < string1.used && i2 < string2.used; i1++, i2++) { + // skip past newlines in string1. + while (i1 < string1.used && string1.string[i1] == f_eos) i1++; + if (i1 == string1.used) break; - for (; i1 < stop1 && i2 < stop2; i1++, i2++) { - while (i1 < stop1 && string1.string[i1] == f_eos) i1++; - while (i2 < stop2 && string2.string[i2] == f_eos) i2++; + // skip past newlines in string2. + while (i2 < string2.used && string2.string[i2] == f_eos) i2++; + if (i2 == string2.used) break; if (string1.string[i1] != string2.string[i2]) return f_not_equal_to; } // for - while (i1 < stop1) { + // only return f_equal_to if all remaining characters are NULL. + while (i1 < string1.used) { if (string1.string[i1] != f_eos) return f_not_equal_to; i1++; } // while - while (i2 < stop2) { + while (i2 < string2.used) { if (string2.string[i2] != f_eos) return f_not_equal_to; i2++; } // while @@ -174,7 +377,7 @@ extern "C" { #endif // _di_fl_compare_dynamic_strings_ #ifndef _di_fl_compare_partial_dynamic_strings_ - f_return_status fl_compare_partial_dynamic_strings(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2) { + f_return_status fl_compare_dynamic_strings_partial(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2) { #ifndef _di_level_1_parameter_checking_ if (string1.used <= 0) return f_error_set_error(f_invalid_parameter); if (string2.used <= 0) return f_error_set_error(f_invalid_parameter); @@ -189,16 +392,22 @@ extern "C" { f_string_length i1 = offset1.start; f_string_length i2 = offset2.start; - f_string_length stop1 = offset1.stop + 1; - f_string_length stop2 = offset2.stop + 1; + const f_string_length stop1 = offset1.stop + 1; + const f_string_length stop2 = offset2.stop + 1; for (; i1 < stop1 && i2 < stop2; i1++, i2++) { + // skip past newlines in string1. while (i1 < stop1 && string1.string[i1] == f_eos) i1++; + if (i1 == stop1) break; + + // skip past newlines in string2. while (i2 < stop2 && string2.string[i2] == f_eos) i2++; + if (i2 == stop2) break; if (string1.string[i1] != string2.string[i2]) return f_not_equal_to; } // for + // only return f_equal_to if all remaining characters are NULL. while (i1 < stop1) { if (string1.string[i1] != f_eos) return f_not_equal_to; i1++; diff --git a/level_1/fl_strings/c/strings.h b/level_1/fl_strings/c/strings.h index a8027da..5b4c197 100644 --- a/level_1/fl_strings/c/strings.h +++ b/level_1/fl_strings/c/strings.h @@ -25,61 +25,254 @@ extern "C" { #endif +/** + * Allocated a new string from the provided range in the buffer. + * + * @param buffer + * The buffer to rip from. + * @param location + * A range within the buffer representing the string to rip. + * @param result + * The new string, which will be allocated or reallocated as necessary. + * + * @return + * f_none on success. + * f_no_data if nothing to rip, no allocations or reallocations are performed. + * f_incomplete_utf_on_eos if end of sting is reached before a complete UTF-8 character can be processed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * f_allocation_error (with error bit) on memory allocation error. + * f_reallocation_error (with error bit) on memory reallocation error. + */ #ifndef _di_fl_rip_string_ - /** - * given a start and stop position, this will return a new string based from the supplied buffer, based on the passed positions. - * this will replace/overwrite existing information inside of the results variable. - */ - extern f_return_status fl_rip_string(const f_dynamic_string buffer, const f_string_location position, f_dynamic_string *results); + extern f_return_status fl_rip_string(const f_dynamic_string buffer, const f_string_location location, f_dynamic_string *result); #endif // _di_fl_rip_string_ -#ifndef _di_fl_seek_line_past_non_graph_ - /** - * given a dynamic string and a string location, seek past all non-graph characters until a graph is reached. - * will ignore the given placeholder. - */ - extern f_return_status fl_seek_line_past_non_graph(const f_dynamic_string buffer, f_string_location *position, const char placeholder); -#endif // _di_fl_seek_line_past_non_graph_ +/** + * Increment buffer location until a graph character (including UTF-8) or an EOL is matched. + * + * This will ignore the UTF-8 BOM. + * + * @param buffer + * The buffer to traverse. + * @param location + * A range within the buffer representing the start and stop locations. + * @param placeholder + * A single-width character representing a placeholder to ignore (may be NULL). + * + * @return + * f_none on success. + * f_none_on_eol on success, but stopped at EOL. + * f_none_on_eos on success, but stopped at end of buffer. + * f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed. + * f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * f_allocation_error (with error bit) on memory allocation error. + * f_reallocation_error (with error bit) on memory reallocation error. + */ +#ifndef _di_fl_seek_line_until_graph_ + extern f_return_status fl_seek_line_until_graph(const f_dynamic_string buffer, f_string_location *location, const char placeholder); +#endif // _di_fl_seek_line_until_graph_ +/** + * Increment buffer location until a non-graph character (including UTF-8) or an EOL is matched. + * + * This will ignore the UTF-8 BOM. + * + * @param buffer + * The buffer to traverse. + * @param location + * A range within the buffer representing the start and stop locations. + * @param placeholder + * A single-width character representing a placeholder to ignore (may be NULL). + * + * @return + * f_none on success. + * f_none_on_eol on success, but stopped at EOL. + * f_none_on_eos on success, but stopped at end of buffer. + * f_none_on_stop on success, but stopped stop location. + * f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed. + * f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * f_allocation_error (with error bit) on memory allocation error. + * f_reallocation_error (with error bit) on memory reallocation error. + */ #ifndef _di_fl_seek_line_until_non_graph_ - /** - * given a dynamic string and a string location, seek past all graph characters until a non-graph is reached. - * will ignore the given placeholder. - */ - extern f_return_status fl_seek_line_until_non_graph(const f_dynamic_string buffer, f_string_location *position, const char placeholder); + extern f_return_status fl_seek_line_until_non_graph(const f_dynamic_string buffer, f_string_location *location, const char placeholder); #endif // _di_fl_seek_line_until_non_graph_ +/** + * Seek the buffer location forward until the character (1-byte wide) or EOL is reached. + * + * @param buffer + * The buffer to traverse. + * @param location + * A range within the buffer representing the start and stop locations. + * The start location will be incremented by seek. + * @param seek_to_this + * A single-width character representing a character to seek to. + * + * @return + * f_none on success. + * f_none_on_eol on success, but stopped at EOL. + * f_none_on_eos on success, but stopped at end of buffer. + * f_none_on_stop on success, but stopped stop location. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * + * @see: fl_seek_line_to_character() + */ +#ifndef _di_fl_seek_line_to_ + extern f_return_status fl_seek_line_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this); +#endif // _di_fl_seek_line_to_ + +/** + * Seek the buffer location forward until the character (up to 4-byte wide) or EOL is reached. + * + * @param buffer + * The buffer to traverse. + * @param location + * A range within the buffer representing the start and stop locations. + * The start location will be incremented by seek. + * @param seek_to_this + * A 1-width, 2-width, 3-width, or 4-width character representing a character to seek to. + * + * @return + * f_none on success. + * f_none_on_eol on success, but stopped at EOL. + * f_none_on_eos on success, but stopped at end of buffer. + * f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed. + * f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * + * @see: fl_seek_line_to() + */ +#ifndef _di_fl_seek_line_to_character_ + extern f_return_status fl_seek_line_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this); +#endif // _di_fl_seek_line_to_character_ + +/** + * Seek the buffer location forward until the character (1-byte wide) is reached. + * + * @param buffer + * The buffer to traverse. + * @param location + * A range within the buffer representing the start and stop locations. + * The start location will be incremented by seek. + * @param seek_to_this + * A single-width character representing a character to seek to. + * + * @return + * f_none on success. + * f_none_on_eos on success, but stopped at end of buffer. + * f_none_on_stop on success, but stopped stop location. + * f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed. + * f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * + * @see: fl_seek_to_character() + */ #ifndef _di_fl_seek_to_ - /** - * given a dynamic string and a string location, seek past all characters until the given character is reached. - */ - extern f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *position, const char seek_to_this); + extern f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this); #endif // _di_fl_seek_to_ +/** + * Seek the buffer location forward until the character (up to 4-byte wide) is reached. + * + * @param buffer + * The buffer to traverse. + * @param location + * A range within the buffer representing the start and stop locations. + * The start location will be incremented by seek. + * @param seek_to_this + * A 1-width, 2-width, 3-width, or 4-width character representing a character to seek to. + * + * @return + * f_none on success. + * f_none_on_eos on success, but stopped at end of buffer. + * f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed. + * f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * + * @see: fl_seek_to() + */ +#ifndef _di_fl_seek_to_character_ + extern f_return_status fl_seek_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this); +#endif // _di_fl_seek_to_character_ + +/** + * Compare two strings, similar to strncmp(). + * + * This does not stop on NULL. + * NULL characters are ignored. + * + * @param string1 + * String to compare. + * @param string2 + * String to compare. + * @param length1 + * Length of string1. + * @param length2 + * Length of string2. + * + * @return + * f_equal_to when both strings equal. + * f_not_equal_to when both strings do not equal. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * + * @see: fl_compare_dynamic_strings() + * @see: fl_compare_dynamic_strings_partial() + */ #ifndef _di_fl_compare_strings_ - /** - * this compares two strings and works similar to that of strncmp(..) but has significant differences to strncmp(..). - * given two strings, this will return either f_equal_to or f_not_equal_to. - * this does not stop on f_eos and f_eos will be ignored as if it were not taking up any space, therefor a 5 character string could return f_equal_to if the 5 character string contains an f_eos anywhere within it. - */ extern f_return_status fl_compare_strings(const f_string string1, const f_string string2, const f_string_length length1, const f_string_length length2); #endif // _di_fl_compare_strings_ +/** + * Compare two strings, similar to strncmp(). + * + * This does not stop on NULL. + * NULL characters are ignored. + * + * @param string1 + * String to compare. + * @param string2 + * String to compare. + * + * @return + * f_equal_to when both strings equal. + * f_not_equal_to when both strings do not equal. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * + * @see: fl_compare_strings() + * @see: fl_compare_dynamic_strings_partial() + */ #ifndef _di_fl_compare_dynamic_strings_ - /** - * this compares two dynamic strings and works similar to that of strncmp(..) but has significant differences to strncmp(..). - * given two strings, this will return either f_equal_to or f_not_equal_to. - * this is far safer than fl_compare_strings(..) as dynamic string contain size information within them. - * this does not stop on f_eos and f_eos will be ignored as if it were not taking up any space, therefor a 5 character string could return f_equal_to if the 5 character string contains an f_eos anywhere within it. - */ extern f_return_status fl_compare_dynamic_strings(const f_dynamic_string string1, const f_dynamic_string string2); #endif // _di_fl_compare_dynamic_strings_ +/** + * Compare two strings, similar to strncmp(), but restricted to the given ranges. + * + * This does not stop on NULL. + * NULL characters are ignored. + * + * @param string1 + * String to compare. + * @param string2 + * String to compare. + * @param offset1 + * A range within the string1 to restrict the comparison to. + * @param offset2 + * A range within the string2 to restrict the comparison to. + * + * @return + * f_equal_to when both strings equal. + * f_not_equal_to when both strings do not equal. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * + * @see: fl_compare_strings() + * @see: fl_compare_dynamic_strings() + */ #ifndef _di_fl_compare_partial_dynamic_strings_ - /** - * this functions identical to fl_compare_dynamic_strings, but uses offsets for both strings. - */ - extern f_return_status fl_compare_partial_dynamic_strings(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2); + extern f_return_status fl_compare_dynamic_strings_partial(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2); #endif // _di_fl_compare_partial_dynamic_strings_ #ifdef __cplusplus diff --git a/level_3/firewall/c/private-firewall.c b/level_3/firewall/c/private-firewall.c index b104a1b..34aaf01 100644 --- a/level_3/firewall/c/private-firewall.c +++ b/level_3/firewall/c/private-firewall.c @@ -919,7 +919,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved location.stop = firewall_group_main_length - 1; fixed_string.string = firewall_group_main; fixed_string.used = firewall_group_main_length; - if (fl_compare_partial_dynamic_strings(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) { + if (fl_compare_dynamic_strings_partial(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) { new_chain = f_false; reserved->has_main = f_true; reserved->main_at = i; @@ -930,7 +930,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved location.stop = firewall_group_stop_length - 1; fixed_string.string = firewall_group_stop; fixed_string.used = firewall_group_stop_length; - if (fl_compare_partial_dynamic_strings(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) { + if (fl_compare_dynamic_strings_partial(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) { new_chain = f_false; reserved->has_stop = f_true; reserved->stop_at = i; @@ -941,7 +941,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved location.stop = firewall_group_lock_length - 1; fixed_string.string = firewall_group_lock; fixed_string.used = firewall_group_lock_length; - if (fl_compare_partial_dynamic_strings(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) { + if (fl_compare_dynamic_strings_partial(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) { new_chain = f_false; reserved->has_lock = f_true; reserved->lock_at = i; @@ -952,7 +952,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved location.stop = firewall_group_lock_length - 1; fixed_string.string = firewall_chain_none; fixed_string.used = firewall_chain_none_length; - if (fl_compare_partial_dynamic_strings(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) { + if (fl_compare_dynamic_strings_partial(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) { new_chain = f_false; } @@ -963,7 +963,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved location.start = 0; location.stop = data->chains.array[j].used - 1; - if (fl_compare_partial_dynamic_strings(local->buffer, data->chains.array[j], local->chain_objects.array[i], location) == f_equal_to) { + if (fl_compare_dynamic_strings_partial(local->buffer, data->chains.array[j], local->chain_objects.array[i], location) == f_equal_to) { new_chain = f_false; local->chain_ids.array[i] = j; diff --git a/level_3/firewall/data/build/dependencies b/level_3/firewall/data/build/dependencies index 55693f4..4b4f94d 100644 --- a/level_3/firewall/data/build/dependencies +++ b/level_3/firewall/data/build/dependencies @@ -6,6 +6,7 @@ f_pipe f_print f_strings f_types +f_utf fl_colors fl_console fl_directory diff --git a/level_3/init/c/init.c b/level_3/init/c/init.c index 9604025..5b7d7d5 100644 --- a/level_3/init/c/init.c +++ b/level_3/init/c/init.c @@ -217,7 +217,7 @@ extern "C" { // sit and wait for signals. - while (1) { + for (;;) { signal_result = sigwaitinfo(&signal_mask, &signal_information); if (signal_result < 0) { @@ -266,7 +266,7 @@ extern "C" { memset(&signal_information, 0, sizeof(siginfo_t)); continue; - } + } // for } init_delete_argument((*argument)); diff --git a/level_3/init/data/build/dependencies b/level_3/init/data/build/dependencies index 55693f4..4b4f94d 100644 --- a/level_3/init/data/build/dependencies +++ b/level_3/init/data/build/dependencies @@ -6,6 +6,7 @@ f_pipe f_print f_strings f_types +f_utf fl_colors fl_console fl_directory