Update: add more support for UTF-8 and improve effected functions

author Kevin Day <thekevinday@gmail.com>

Wed, 28 Aug 2019 04:59:00 +0000 (23:59 -0500)

committer Kevin Day <thekevinday@gmail.com>

Wed, 28 Aug 2019 04:59:00 +0000 (23:59 -0500)
author Kevin Day <thekevinday@gmail.com>
Wed, 28 Aug 2019 04:59:00 +0000 (23:59 -0500)
committer Kevin Day <thekevinday@gmail.com>
Wed, 28 Aug 2019 04:59:00 +0000 (23:59 -0500)
diff --git a/level_0/f_serialized/c/serialized.h b/level_0/f_serialized/c/serialized.h

index 88723792ae9c5ba0ee8e7589e4434392840bccc2..fa97c99989f87a858b38fc67616558a16e8168ea 100644 (file)
--- a/level_0/f_serialized/c/serialized.h
+++ b/level_0/f_serialized/c/serialized.h
@@ -7,7 +7,7 @@
   *
   * Provides string processing functionality for what is to be defined as a serialized string.
   * Serialized strings are strings that can hold multiple values in a single variable.
- * An example of serialized content is the PATH environment variable where ":" separates data..
+ * An example of serialized content is the PATH environment variable where ":" separates data.
   */
  #ifndef _F_serialized_h
  #define _F_serialized_h
@@ -34,7 +34,8 @@ extern "C" {
  #endif // _di_f_serialized_splitters_
  
  #ifndef _di_f_serialized_default_allocation_step_
-  #define f_serialized_default_allocation_step f_memory_default_allocation_step
+  // provide a UTF-8 friendly allocation step.
+  #define f_serialized_default_allocation_step 4
  #endif // _di_f_serialized_default_allocation_step_
  
  #ifdef __cplusplus
diff --git a/level_0/f_utf/c/utf.c b/level_0/f_utf/c/utf.c

index 3a947dc92f851a42a7043471233847f1ab1bb331..10c61fadb32092904e7a4b38663abe50f9473622 100644 (file)
--- a/level_0/f_utf/c/utf.c
+++ b/level_0/f_utf/c/utf.c
@@ -4,8 +4,8 @@
  extern "C" {
  #endif
  
-#ifndef _di_f_utf_is_bom_string_
-  f_return_status f_utf_is_bom_string(const f_string character, const f_u_short max_width) {
+#ifndef _di_f_utf_is_bom_
+  f_return_status f_utf_is_bom(const f_string character, const f_u_short max_width) {
      #ifndef _di_level_0_parameter_checking_
        if (max_width < 1) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_0_parameter_checking_
@@ -28,10 +28,10 @@ extern "C" {
  
      return f_false;
    }
-#endif // _di_f_utf_is_bom_string_
+#endif // _di_f_utf_is_bom_
  
-#ifndef _di_f_utf_is_graph_string_
-  f_return_status f_utf_is_graph_string(const f_string character, const f_u_short max_width) {
+#ifndef _di_f_utf_is_graph_
+  f_return_status f_utf_is_graph(const f_string character, const f_u_short max_width) {
      #ifndef _di_level_0_parameter_checking_
        if (max_width < 1) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_0_parameter_checking_
@@ -48,20 +48,20 @@ extern "C" {
  
      // for now, just assume that any non-whitespace, non-substitute utf-8 character is a graph.
  
-    if (f_utf_is_space_string(character, max_width) == f_true) {
+    if (f_utf_is_space(character, max_width) == f_true) {
        return f_false;
      }
  
-    if (f_utf_is_bom_string(character, max_width) == f_true) {
+    if (f_utf_is_bom(character, max_width) == f_true) {
        return f_false;
      }
  
      return f_true;
    }
-#endif // _di_f_utf_is_graph_string_
+#endif // _di_f_utf_is_graph_
  
-#ifndef _di_f_utf_is_space_string_
-  f_return_status f_utf_is_space_string(const f_string character, const f_u_short max_width) {
+#ifndef _di_f_utf_is_space_
+  f_return_status f_utf_is_space(const f_string character, const f_u_short max_width) {
      #ifndef _di_level_0_parameter_checking_
        if (max_width < 1) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_0_parameter_checking_
@@ -190,10 +190,10 @@ extern "C" {
  
      return f_false;
    }
-#endif // _di_f_utf_is_space_string_
+#endif // _di_f_utf_is_space_
  
-#ifndef _di_f_utf_is_substitute_string_
-  f_return_status f_utf_is_substitute_string(const f_string character, const f_u_short max_width) {
+#ifndef _di_f_utf_is_substitute_
+  f_return_status f_utf_is_substitute(const f_string character, const f_u_short max_width) {
      #ifndef _di_level_0_parameter_checking_
        if (max_width < 1) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_0_parameter_checking_
@@ -238,10 +238,10 @@ extern "C" {
  
      return f_false;
    }
-#endif // _di_f_utf_is_substitute_string_
+#endif // _di_f_utf_is_substitute_
  
-#ifndef _di_f_utf_is_whitespace_string_
-  f_return_status f_utf_is_whitespace_string(const f_string character, const f_u_short max_width) {
+#ifndef _di_f_utf_is_whitespace_
+  f_return_status f_utf_is_whitespace(const f_string character, const f_u_short max_width) {
      #ifndef _di_level_0_parameter_checking_
        if (max_width < 1) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_0_parameter_checking_
@@ -350,20 +350,12 @@ extern "C" {
  
      return f_false;
    }
-#endif // _di_f_utf_is_whitespace_string_
+#endif // _di_f_utf_is_whitespace_
  
  #ifndef _di_f_utf_is_bom_character_
    f_return_status f_utf_is_bom_character(const f_utf_character character) {
-    f_u_short width = f_macro_utf_byte_width(character.byte_1);
-
-    if (width == 1) {
-      return f_false;
-    }
-
-    if (width == 3) {
-      if (character.byte_1 == f_utf_bom[0] && character.byte_2 == f_utf_bom[1] && character.byte_3 == f_utf_bom[2]) {
-        return f_true;
-      }
+    if (character == f_macro_utf_character_mask_bom) {
+      return f_true;
      }
  
      return f_false;
@@ -372,7 +364,7 @@ extern "C" {
  
  #ifndef _di_f_utf_is_graph_character_
    f_return_status f_utf_is_graph_character(const f_utf_character character) {
-    f_u_short width = f_macro_utf_byte_width(character.byte_1);
+    f_u_short width = f_macro_utf_character_width_is(character);
  
      if (width == 0) {
        return f_false;
@@ -394,26 +386,28 @@ extern "C" {
  
  #ifndef _di_f_utf_is_space_character_
    f_return_status f_utf_is_space_character(const f_utf_character character) {
-    f_u_short width = f_macro_utf_byte_width(character.byte_1);
+    f_u_short width = f_macro_utf_character_width(character);
  
      if (width == 1) {
        return f_false;
      }
  
      if (width == 2) {
-      if (character.byte_1 == f_utf_space_no_break[0] && character.byte_2 == f_utf_space_no_break[1]) {
+      char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) };
+
+      if (utf[0] == f_utf_space_no_break[0] && utf[1] == f_utf_space_no_break[1]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_line_feed_reverse[0] && character.byte_2 == f_utf_space_line_feed_reverse[1]) {
+      if (utf[0] == f_utf_space_line_feed_reverse[0] && utf[1] == f_utf_space_line_feed_reverse[1]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_line_next[0] && character.byte_2 == f_utf_space_line_next[1]) {
+      if (utf[0] == f_utf_space_line_next[0] && utf[1] == f_utf_space_line_next[1]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_substitute_middle_dot[0] && character.byte_2 == f_utf_substitute_middle_dot[1]) {
+      if (utf[0] == f_utf_substitute_middle_dot[0] && utf[1] == f_utf_substitute_middle_dot[1]) {
          return f_true;
        }
  
@@ -421,91 +415,93 @@ extern "C" {
      }
  
      if (width == 3) {
-      if (character.byte_1 == f_utf_space_no_break_narrow[0] && character.byte_2 == f_utf_space_no_break_narrow[1] && character.byte_3 == f_utf_space_no_break_narrow[2]) {
+      char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) };
+
+      if (utf[0] == f_utf_space_no_break_narrow[0] && utf[1] == f_utf_space_no_break_narrow[1] && utf[2] == f_utf_space_no_break_narrow[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_en[0] && character.byte_2 == f_utf_space_en[1] && character.byte_3 == f_utf_space_en[2]) {
+      if (utf[0] == f_utf_space_en[0] && utf[1] == f_utf_space_en[1] && utf[2] == f_utf_space_en[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_en_quad[0] && character.byte_2 == f_utf_space_en_quad[1] && character.byte_3 == f_utf_space_en_quad[2]) {
+      if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_en_quad[0] && character.byte_2 == f_utf_space_en_quad[1] && character.byte_3 == f_utf_space_en_quad[2]) {
+      if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em[0] && character.byte_2 == f_utf_space_em[1] && character.byte_3 == f_utf_space_em[2]) {
+      if (utf[0] == f_utf_space_em[0] && utf[1] == f_utf_space_em[1] && utf[2] == f_utf_space_em[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em_quad[0] && character.byte_2 == f_utf_space_em_quad[1] && character.byte_3 == f_utf_space_em_quad[2]) {
+      if (utf[0] == f_utf_space_em_quad[0] && utf[1] == f_utf_space_em_quad[1] && utf[2] == f_utf_space_em_quad[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em_per_three[0] && character.byte_2 == f_utf_space_em_per_three[1] && character.byte_3 == f_utf_space_em_per_three[2]) {
+      if (utf[0] == f_utf_space_em_per_three[0] && utf[1] == f_utf_space_em_per_three[1] && utf[2] == f_utf_space_em_per_three[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em_per_four[0] && character.byte_2 == f_utf_space_em_per_four[1] && character.byte_3 == f_utf_space_em_per_four[2]) {
+      if (utf[0] == f_utf_space_em_per_four[0] && utf[1] == f_utf_space_em_per_four[1] && utf[2] == f_utf_space_em_per_four[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em_per_six[0] && character.byte_2 == f_utf_space_em_per_six[1] && character.byte_3 == f_utf_space_em_per_six[2]) {
+      if (utf[0] == f_utf_space_em_per_six[0] && utf[1] == f_utf_space_em_per_six[1] && utf[2] == f_utf_space_em_per_six[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_figure[0] && character.byte_2 == f_utf_space_figure[1] && character.byte_3 == f_utf_space_figure[2]) {
+      if (utf[0] == f_utf_space_figure[0] && utf[1] == f_utf_space_figure[1] && utf[2] == f_utf_space_figure[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_punctuation[0] && character.byte_2 == f_utf_space_punctuation[1] && character.byte_3 == f_utf_space_punctuation[2]) {
+      if (utf[0] == f_utf_space_punctuation[0] && utf[1] == f_utf_space_punctuation[1] && utf[2] == f_utf_space_punctuation[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_thin[0] && character.byte_2 == f_utf_space_thin[1] && character.byte_3 == f_utf_space_thin[2]) {
+      if (utf[0] == f_utf_space_thin[0] && utf[1] == f_utf_space_thin[1] && utf[2] == f_utf_space_thin[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_hair[0] && character.byte_2 == f_utf_space_hair[1] && character.byte_3 == f_utf_space_hair[2]) {
+      if (utf[0] == f_utf_space_hair[0] && utf[1] == f_utf_space_hair[1] && utf[2] == f_utf_space_hair[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_separator_line[0] && character.byte_2 == f_utf_space_separator_line[1] && character.byte_3 == f_utf_space_separator_line[2]) {
+      if (utf[0] == f_utf_space_separator_line[0] && utf[1] == f_utf_space_separator_line[1] && utf[2] == f_utf_space_separator_line[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_separator_paragraph[0] && character.byte_2 == f_utf_space_separator_paragraph[1] && character.byte_3 == f_utf_space_separator_paragraph[2]) {
+      if (utf[0] == f_utf_space_separator_paragraph[0] && utf[1] == f_utf_space_separator_paragraph[1] && utf[2] == f_utf_space_separator_paragraph[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_ogham[0] && character.byte_2 == f_utf_space_ogham[1] && character.byte_3 == f_utf_space_ogham[2]) {
+      if (utf[0] == f_utf_space_ogham[0] && utf[1] == f_utf_space_ogham[1] && utf[2] == f_utf_space_ogham[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_ideographic[0] && character.byte_2 == f_utf_space_ideographic[1] && character.byte_3 == f_utf_space_ideographic[2]) {
+      if (utf[0] == f_utf_space_ideographic[0] && utf[1] == f_utf_space_ideographic[1] && utf[2] == f_utf_space_ideographic[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_medium_mathematical[0] && character.byte_2 == f_utf_space_medium_mathematical[1] && character.byte_3 == f_utf_space_medium_mathematical[2]) {
+      if (utf[0] == f_utf_space_medium_mathematical[0] && utf[1] == f_utf_space_medium_mathematical[1] && utf[2] == f_utf_space_medium_mathematical[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_substitute_symbol_blank[0] && character.byte_2 == f_utf_substitute_symbol_blank[1] && character.byte_3 == f_utf_substitute_symbol_blank[2]) {
+      if (utf[0] == f_utf_substitute_symbol_blank[0] && utf[1] == f_utf_substitute_symbol_blank[1] && utf[2] == f_utf_substitute_symbol_blank[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_substitute_symbol_space[0] && character.byte_2 == f_utf_substitute_symbol_space[1] && character.byte_3 == f_utf_substitute_symbol_space[2]) {
+      if (utf[0] == f_utf_substitute_symbol_space[0] && utf[1] == f_utf_substitute_symbol_space[1] && utf[2] == f_utf_substitute_symbol_space[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_substitute_open_box[0] && character.byte_2 == f_utf_substitute_open_box[1] && character.byte_3 == f_utf_substitute_open_box[2]) {
+      if (utf[0] == f_utf_substitute_open_box[0] && utf[1] == f_utf_substitute_open_box[1] && utf[2] == f_utf_substitute_open_box[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_substitute_open_box_shouldered[0] && character.byte_2 == f_utf_substitute_open_box_shouldered[1] && character.byte_3 == f_utf_substitute_open_box_shouldered[2]) {
+      if (utf[0] == f_utf_substitute_open_box_shouldered[0] && utf[1] == f_utf_substitute_open_box_shouldered[1] && utf[2] == f_utf_substitute_open_box_shouldered[2]) {
          return f_true;
        }
  
@@ -518,14 +514,16 @@ extern "C" {
  
  #ifndef _di_f_utf_is_substitute_character_
    f_return_status f_utf_is_substitute_character(const f_utf_character character) {
-    f_u_short width = f_macro_utf_byte_width(character.byte_1);
+    f_u_short width = f_macro_utf_character_width(character);
  
      if (width == 1) {
        return f_false;
      }
  
      if (width == 2) {
-      if (character.byte_1 == f_utf_substitute_middle_dot[0] && character.byte_2 == f_utf_substitute_middle_dot[1]) {
+      char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) };
+
+      if (utf[0] == f_utf_substitute_middle_dot[0] && utf[1] == f_utf_substitute_middle_dot[1]) {
          return f_true;
        }
  
@@ -533,19 +531,21 @@ extern "C" {
      }
  
      if (width == 3) {
-      if (character.byte_1 == f_utf_substitute_symbol_blank[0] && character.byte_2 == f_utf_substitute_symbol_blank[1] && character.byte_3 == f_utf_substitute_symbol_blank[2]) {
+      char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) };
+
+      if (utf[0] == f_utf_substitute_symbol_blank[0] && utf[1] == f_utf_substitute_symbol_blank[1] && utf[2] == f_utf_substitute_symbol_blank[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_substitute_symbol_space[0] && character.byte_2 == f_utf_substitute_symbol_space[1] && character.byte_3 == f_utf_substitute_symbol_space[2]) {
+      if (utf[0] == f_utf_substitute_symbol_space[0] && utf[1] == f_utf_substitute_symbol_space[1] && utf[2] == f_utf_substitute_symbol_space[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_substitute_open_box[0] && character.byte_2 == f_utf_substitute_open_box[1] && character.byte_3 == f_utf_substitute_open_box[2]) {
+      if (utf[0] == f_utf_substitute_open_box[0] && utf[1] == f_utf_substitute_open_box[1] && utf[2] == f_utf_substitute_open_box[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_substitute_open_box_shouldered[0] && character.byte_2 == f_utf_substitute_open_box_shouldered[1] && character.byte_3 == f_utf_substitute_open_box_shouldered[2]) {
+      if (utf[0] == f_utf_substitute_open_box_shouldered[0] && utf[1] == f_utf_substitute_open_box_shouldered[1] && utf[2] == f_utf_substitute_open_box_shouldered[2]) {
          return f_true;
        }
  
@@ -558,22 +558,24 @@ extern "C" {
  
  #ifndef _di_f_utf_is_whitespace_character_
    f_return_status f_utf_is_whitespace_character(const f_utf_character character) {
-    f_u_short width = f_macro_utf_byte_width(character.byte_1);
+    f_u_short width = f_macro_utf_character_width(character);
  
      if (width == 1) {
        return f_false;
      }
  
      if (width == 2) {
-      if (character.byte_1 == f_utf_space_no_break[0] && character.byte_2 == f_utf_space_no_break[1]) {
+      char utf[2] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character) };
+
+      if (utf[0] == f_utf_space_no_break[0] && utf[1] == f_utf_space_no_break[1]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_line_feed_reverse[0] && character.byte_2 == f_utf_space_line_feed_reverse[1]) {
+      if (utf[0] == f_utf_space_line_feed_reverse[0] && utf[1] == f_utf_space_line_feed_reverse[1]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_line_next[0] && character.byte_2 == f_utf_space_line_next[1]) {
+      if (utf[0] == f_utf_space_line_next[0] && utf[1] == f_utf_space_line_next[1]) {
          return f_true;
        }
  
@@ -581,75 +583,77 @@ extern "C" {
      }
  
      if (width == 3) {
-      if (character.byte_1 == f_utf_space_no_break_narrow[0] && character.byte_2 == f_utf_space_no_break_narrow[1] && character.byte_3 == f_utf_space_no_break_narrow[2]) {
+      char utf[3] = { f_macro_utf_character_to_char_1(character), f_macro_utf_character_to_char_2(character), f_macro_utf_character_to_char_3(character) };
+
+      if (utf[0] == f_utf_space_no_break_narrow[0] && utf[1] == f_utf_space_no_break_narrow[1] && utf[2] == f_utf_space_no_break_narrow[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_en[0] && character.byte_2 == f_utf_space_en[1] && character.byte_3 == f_utf_space_en[2]) {
+      if (utf[0] == f_utf_space_en[0] && utf[1] == f_utf_space_en[1] && utf[2] == f_utf_space_en[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_en_quad[0] && character.byte_2 == f_utf_space_en_quad[1] && character.byte_3 == f_utf_space_en_quad[2]) {
+      if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_en_quad[0] && character.byte_2 == f_utf_space_en_quad[1] && character.byte_3 == f_utf_space_en_quad[2]) {
+      if (utf[0] == f_utf_space_en_quad[0] && utf[1] == f_utf_space_en_quad[1] && utf[2] == f_utf_space_en_quad[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em[0] && character.byte_2 == f_utf_space_em[1] && character.byte_3 == f_utf_space_em[2]) {
+      if (utf[0] == f_utf_space_em[0] && utf[1] == f_utf_space_em[1] && utf[2] == f_utf_space_em[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em_quad[0] && character.byte_2 == f_utf_space_em_quad[1] && character.byte_3 == f_utf_space_em_quad[2]) {
+      if (utf[0] == f_utf_space_em_quad[0] && utf[1] == f_utf_space_em_quad[1] && utf[2] == f_utf_space_em_quad[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em_per_three[0] && character.byte_2 == f_utf_space_em_per_three[1] && character.byte_3 == f_utf_space_em_per_three[2]) {
+      if (utf[0] == f_utf_space_em_per_three[0] && utf[1] == f_utf_space_em_per_three[1] && utf[2] == f_utf_space_em_per_three[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em_per_four[0] && character.byte_2 == f_utf_space_em_per_four[1] && character.byte_3 == f_utf_space_em_per_four[2]) {
+      if (utf[0] == f_utf_space_em_per_four[0] && utf[1] == f_utf_space_em_per_four[1] && utf[2] == f_utf_space_em_per_four[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_em_per_six[0] && character.byte_2 == f_utf_space_em_per_six[1] && character.byte_3 == f_utf_space_em_per_six[2]) {
+      if (utf[0] == f_utf_space_em_per_six[0] && utf[1] == f_utf_space_em_per_six[1] && utf[2] == f_utf_space_em_per_six[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_figure[0] && character.byte_2 == f_utf_space_figure[1] && character.byte_3 == f_utf_space_figure[2]) {
+      if (utf[0] == f_utf_space_figure[0] && utf[1] == f_utf_space_figure[1] && utf[2] == f_utf_space_figure[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_punctuation[0] && character.byte_2 == f_utf_space_punctuation[1] && character.byte_3 == f_utf_space_punctuation[2]) {
+      if (utf[0] == f_utf_space_punctuation[0] && utf[1] == f_utf_space_punctuation[1] && utf[2] == f_utf_space_punctuation[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_thin[0] && character.byte_2 == f_utf_space_thin[1] && character.byte_3 == f_utf_space_thin[2]) {
+      if (utf[0] == f_utf_space_thin[0] && utf[1] == f_utf_space_thin[1] && utf[2] == f_utf_space_thin[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_hair[0] && character.byte_2 == f_utf_space_hair[1] && character.byte_3 == f_utf_space_hair[2]) {
+      if (utf[0] == f_utf_space_hair[0] && utf[1] == f_utf_space_hair[1] && utf[2] == f_utf_space_hair[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_separator_line[0] && character.byte_2 == f_utf_space_separator_line[1] && character.byte_3 == f_utf_space_separator_line[2]) {
+      if (utf[0] == f_utf_space_separator_line[0] && utf[1] == f_utf_space_separator_line[1] && utf[2] == f_utf_space_separator_line[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_separator_paragraph[0] && character.byte_2 == f_utf_space_separator_paragraph[1] && character.byte_3 == f_utf_space_separator_paragraph[2]) {
+      if (utf[0] == f_utf_space_separator_paragraph[0] && utf[1] == f_utf_space_separator_paragraph[1] && utf[2] == f_utf_space_separator_paragraph[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_ogham[0] && character.byte_2 == f_utf_space_ogham[1] && character.byte_3 == f_utf_space_ogham[2]) {
+      if (utf[0] == f_utf_space_ogham[0] && utf[1] == f_utf_space_ogham[1] && utf[2] == f_utf_space_ogham[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_ideographic[0] && character.byte_2 == f_utf_space_ideographic[1] && character.byte_3 == f_utf_space_ideographic[2]) {
+      if (utf[0] == f_utf_space_ideographic[0] && utf[1] == f_utf_space_ideographic[1] && utf[2] == f_utf_space_ideographic[2]) {
          return f_true;
        }
  
-      if (character.byte_1 == f_utf_space_medium_mathematical[0] && character.byte_2 == f_utf_space_medium_mathematical[1] && character.byte_3 == f_utf_space_medium_mathematical[2]) {
+      if (utf[0] == f_utf_space_medium_mathematical[0] && utf[1] == f_utf_space_medium_mathematical[1] && utf[2] == f_utf_space_medium_mathematical[2]) {
          return f_true;
        }
  
@@ -660,43 +664,49 @@ extern "C" {
    }
  #endif // _di_f_utf_is_whitespace_character_
  
-#ifndef _di_f_utf_string_to_character_
-  f_return_status f_utf_string_to_character(const f_string character_string, const f_u_short max_width, f_utf_character *utf_character) {
+#ifndef _di_f_utf_char_to_character_
+  f_return_status f_utf_char_to_character(const f_string character, const f_u_short max_width, f_utf_character *utf_character) {
      #ifndef _di_level_0_parameter_checking_
        if (max_width < 1) return f_error_set_error(f_invalid_parameter);
+      if (utf_character == 0) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    f_u_short width = f_macro_utf_byte_width_is(character_string[0]);
+    f_u_short width = f_macro_utf_byte_width(*character);
+
+    if (width == 1) {
+      *utf_character = f_macro_utf_character_from_char_1(character[0]);
+      return f_none;
+    }
  
-    if (width >= max_width) {
+    if (width > max_width) {
        return f_error_set_error(f_failure);
      }
  
      memset(utf_character, 0, sizeof(f_utf_character));
  
-    utf_character->byte_1 = character_string[0];
+    *utf_character |= f_macro_utf_character_to_char_1(character[0]);
  
      if (width < 2) {
        return f_none;
      }
  
-    utf_character->byte_2 = character_string[1];
+    *utf_character |= f_macro_utf_character_to_char_2(character[1]);
  
      if (width == 2) {
        return f_none;
      }
  
-    utf_character->byte_3 = character_string[2];
+    *utf_character |= f_macro_utf_character_to_char_3(character[2]);
  
      if (width == 3) {
        return f_none;
      }
  
-    utf_character->byte_4 = character_string[3];
+    *utf_character |= f_macro_utf_character_to_char_4(character[3]);
  
      return f_none;
    }
-#endif // _di_f_utf_string_to_character_
+#endif // _di_f_utf_char_to_character_
  
  #ifdef __cplusplus
  } // extern "C"
diff --git a/level_0/f_utf/c/utf.h b/level_0/f_utf/c/utf.h

index e3569ec4fcb07300ce28fe2903a3d20cf486ad11..29ec202c1a7d9930d0b3ca2eed5a56ad61bca24f 100644 (file)
--- a/level_0/f_utf/c/utf.h
+++ b/level_0/f_utf/c/utf.h
@@ -61,28 +61,6 @@ extern "C" {
  #endif // _di_f_utf_bom_
  
  /**
- * Provide a basic UTF-8 character.
- *
- * This is intended to be used so that a single path parameter can be passed to a function instead of an array of characters.
- */
-#ifndef _di_f_utf_character_
-  typedef struct {
-    char byte_1;
-    char byte_2;
-    char byte_3;
-    char byte_4;
-  } f_utf_character;
-
-  #define f_utf_character_initialize \
-  { \
-    '\0', \
-    '\0', \
-    '\0', \
-    '\0', \
-  }
-#endif // _di_f_utf_char_
-
-/**
   * Define the UTF-8 bytes.
   *
   * The bytes are for checking a single 8-bit character value (specifically, checking the first bits).
@@ -94,7 +72,6 @@ extern "C" {
   * The f_macro_utf_byte_is_* macros are used to determine a width of the character (either 1, 2, 3, or 4, respectively).
   *
   * The f_macro_utf_byte_width macro determines a width of the character.
- *
   * The f_macro_utf_byte_width_is is identical to f_macro_utf_byte_width, except it returns 0 when character is not UTF-8.
   */
  #ifndef _di_f_utf_byte_
@@ -120,6 +97,53 @@ extern "C" {
  #endif // _di_f_utf_byte_
  
  /**
+ * Provide a basic UTF-8 character as a single 4-byte variable.
+ *
+ * This is intended to be used when a single variable is desired to represent a 1-byte, 2-byte, 3-byte, or even 4-byte character.
+ *
+ * The byte structure is intended to be read left to right.
+ *
+ * The f_macro_utf_character_mask_byte_* are used to get the entire character set fo a given width.
+ *
+ * The f_macro_utf_character_mask_char_* are used to get a specific UTF-8 block as a single character range.
+ *
+ * The f_macro_utf_character_to_char_* are used to convert a f_utf_character into a char, for a given 8-bit block.
+ *
+ * The f_macro_utf_character_from_char_* are used to convert a char into part of a f_utf_character, for a given 8-bit block.
+ *
+ * The f_macro_utf_character_width is used to determine the width of the UTF-8 character based on f_macro_utf_byte_width.
+ * The f_macro_utf_character_width_is is used to determine the width of the UTF-8 character based on f_macro_utf_byte_width_is.
+ */
+#ifndef _di_f_utf_character_
+  typedef uint32_t f_utf_character;
+
+  #define f_macro_utf_character_mask_bom 0xefbbbf00 // 1110 1111, 1011 1011, 1011 1111, 0000 0000
+
+  #define f_macro_utf_character_mask_byte_1 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000
+  #define f_macro_utf_character_mask_byte_2 0xffff0000 // 1111 1111, 1111 1111, 0000 0000, 0000 0000
+  #define f_macro_utf_character_mask_byte_3 0xffffff00 // 1111 1111, 1111 1111, 1111 1111, 0000 0000
+  #define f_macro_utf_character_mask_byte_4 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111
+
+  #define f_macro_utf_character_mask_char_1 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000
+  #define f_macro_utf_character_mask_char_2 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000
+  #define f_macro_utf_character_mask_char_3 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000
+  #define f_macro_utf_character_mask_char_4 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111
+
+  #define f_macro_utf_character_to_char_1(character) ((f_macro_utf_character_mask_char_1 & character) >> 24) // grab first byte.
+  #define f_macro_utf_character_to_char_2(character) ((f_macro_utf_character_mask_char_2 & character) >> 16) // grab second byte.
+  #define f_macro_utf_character_to_char_3(character) ((f_macro_utf_character_mask_char_3 & character) >> 8) // grab third byte.
+  #define f_macro_utf_character_to_char_4(character) (f_macro_utf_character_mask_char_4 & character) // grab fourth byte.
+
+  #define f_macro_utf_character_from_char_1(character) (character << 24) // shift the first byte.
+  #define f_macro_utf_character_from_char_2(character) (character << 16) // shift the second byte.
+  #define f_macro_utf_character_from_char_3(character) (character << 8) // shift the third byte.
+  #define f_macro_utf_character_from_char_4(character) (character) // shift the fourth byte.
+
+  #define f_macro_utf_character_width(character) (f_macro_utf_byte_width(f_macro_utf_character_to_char_1(character)))
+  #define f_macro_utf_character_width_is(character) (f_macro_utf_byte_width_is(f_macro_utf_character_to_char_1(character)))
+#endif // _di_f_utf_character_
+
+/**
   * Define the UTF-8 general whitespace codes.
   *
   * These are integers representing character codes that represent types of spaces.
@@ -223,9 +247,9 @@ extern "C" {
   *   f_maybe (with error bit) if this could be a whitespace or substitute but width is not long enough.
   *   f_invalid_parameter (with error bit) if a parameter is invalid.
   */
-#ifndef _di_f_utf_is_bom_string_
-  extern f_return_status f_utf_is_bom_string(const f_string character, const f_u_short max_width);
-#endif // _di_f_utf_is_bom_string_
+#ifndef _di_f_utf_is_bom_
+  extern f_return_status f_utf_is_bom(const f_string character, const f_u_short max_width);
+#endif // _di_f_utf_is_bom_
  
  /**
   * Check to see if the entire byte block of the character is a UTF-8 printable character.
@@ -245,9 +269,9 @@ extern "C" {
   *   f_maybe (with error bit) if this could be a graph but width is not long enough.
   *   f_invalid_parameter (with error bit) if a parameter is invalid.
   */
-#ifndef _di_f_utf_is_graph_string_
-  extern f_return_status f_utf_is_graph_string(const f_string character, const f_u_short max_width);
-#endif // _di_f_utf_is_graph_string_
+#ifndef _di_f_utf_is_graph_
+  extern f_return_status f_utf_is_graph(const f_string character, const f_u_short max_width);
+#endif // _di_f_utf_is_graph_
  
  /**
   * Check to see if the entire byte block of the character is a UTF-8 whitespace or substitute character.
@@ -267,9 +291,9 @@ extern "C" {
   *   f_maybe (with error bit) if this could be a whitespace or substitute but width is not long enough.
   *   f_invalid_parameter (with error bit) if a parameter is invalid.
   */
-#ifndef _di_f_utf_is_space_string_
-  extern f_return_status f_utf_is_space_string(const f_string character, const f_u_short max_width);
-#endif // _di_f_utf_is_space_string_
+#ifndef _di_f_utf_is_space_
+  extern f_return_status f_utf_is_space(const f_string character, const f_u_short max_width);
+#endif // _di_f_utf_is_space_
  
  /**
   * Check to see if the entire byte block of the character is a UTF-8 whitespace substitute character.
@@ -289,9 +313,9 @@ extern "C" {
   *   f_maybe (with error bit) if this could be a substitute but width is not long enough.
   *   f_invalid_parameter (with error bit) if a parameter is invalid.
   */
-#ifndef _di_f_utf_is_substitute_string_
-  extern f_return_status f_utf_is_substitute_string(const f_string character, const f_u_short max_width);
-#endif // _di_f_utf_is_substitute_string_
+#ifndef _di_f_utf_is_substitute_
+  extern f_return_status f_utf_is_substitute(const f_string character, const f_u_short max_width);
+#endif // _di_f_utf_is_substitute_
  
  /**
   * Check to see if the entire byte block of the character is a UTF-8 general whitespace character.
@@ -311,9 +335,9 @@ extern "C" {
   *   f_maybe (with error bit) if this could be a whitespace but width is not long enough.
   *   f_invalid_parameter (with error bit) if a parameter is invalid.
   */
-#ifndef _di_f_utf_is_whitespace_string_
-  extern f_return_status f_utf_is_whitespace_string(const f_string character, const f_u_short max_width);
-#endif // _di_f_utf_is_whitespace_string_
+#ifndef _di_f_utf_is_whitespace_
+  extern f_return_status f_utf_is_whitespace(const f_string character, const f_u_short max_width);
+#endif // _di_f_utf_is_whitespace_
  
  /**
   * Check to see if the entire byte block of the character is a UTF-8 BOM.
@@ -404,7 +428,7 @@ extern "C" {
   *
   * This will also convert ASCII characters.
   *
- * @param character_string
+ * @param character
   *   The character string to be converted to the f_utf_character type.
   *   There must be enough space allocated to convert against, as limited by max_width.
   * @param max_width
@@ -419,9 +443,9 @@ extern "C" {
   *   f_failure (with error bit) if width is not long enough to convert.
   *   f_invalid_parameter (with error bit) if a parameter is invalid.
   */
-#ifndef _di_f_utf_string_to_character_
-  extern f_return_status f_utf_string_to_character(const f_string character_string, const f_u_short max_width, f_utf_character *utf_character);
-#endif // _di_f_utf_string_to_character_
+#ifndef _di_f_utf_char_to_character_
+  extern f_return_status f_utf_char_to_character(const f_string character, const f_u_short max_width, f_utf_character *utf_character);
+#endif // _di_f_utf_char_to_character_
  
  #ifdef __cplusplus
  } // extern "C"
diff --git a/level_1/fl_fss/c/fss.c b/level_1/fl_fss/c/fss.c

index 73edd321c3979dcee95fcc0ade5c124ae6d3f711..ac42d115997d650ce24afd5bcdc2925c95a71298 100644 (file)
--- a/level_1/fl_fss/c/fss.c
+++ b/level_1/fl_fss/c/fss.c
@@ -49,7 +49,7 @@ extern "C" {
  
      // A single UTF-8 BOM is allowed to exist before the valid FSS identifier.
      if (buffer.used > 3) {
-      f_status status = f_utf_is_bom_string(buffer.string, 4);
+      f_status status = f_utf_is_bom(buffer.string, 4);
  
        if (f_error_is_error(status)) {
          return f_error_set_error(fl_fss_no_header);
@@ -297,7 +297,7 @@ extern "C" {
        max_width = buffer.used - input.start;
      }
  
-    f_status status = f_utf_is_space_string(buffer.string + input.start, max_width);
+    f_status status = f_utf_is_space(buffer.string + input.start, max_width);
  
      if (f_error_is_error(status)) {
        return status;
@@ -336,7 +336,7 @@ extern "C" {
        max_width = buffer.used - input.start;
      }
  
-    f_status status = f_utf_is_space_string(buffer.string + input.start, max_width);
+    f_status status = f_utf_is_space(buffer.string + input.start, max_width);
  
      if (f_error_is_error(status)) {
        return status;
@@ -370,8 +370,8 @@ extern "C" {
        if (buffer.string[input->start] != f_fss_delimit_placeholder) {
          max_width = (input->stop - input->start) + 1;
  
-        if (f_utf_is_space_string(buffer.string +input->start, max_width) != f_true) {
-          if (f_utf_is_bom_string(buffer.string + input->start, max_width) != f_true) {
+        if (f_utf_is_space(buffer.string +input->start, max_width) != f_true) {
+          if (f_utf_is_bom(buffer.string + input->start, max_width) != f_true) {
              break;
            }
          }
@@ -402,8 +402,8 @@ extern "C" {
        if (buffer.string[input->start] != f_fss_delimit_placeholder) {
          max_width = (input->stop - input->start) + 1;
  
-        if (f_utf_is_space_string(buffer.string + input->start, max_width) != f_true) {
-          if (f_utf_is_bom_string(buffer.string + input->start, max_width) != f_true) {
+        if (f_utf_is_space(buffer.string + input->start, max_width) != f_true) {
+          if (f_utf_is_bom(buffer.string + input->start, max_width) != f_true) {
              break;
            }
          }
diff --git a/level_1/fl_fss/c/fss.h b/level_1/fl_fss/c/fss.h

index 43a896cfaceaa21367db54f6514e1f262960964e..c781db2e40d25bdcd02a9a85d7ed1b95715f0e79 100644 (file)
--- a/level_1/fl_fss/c/fss.h
+++ b/level_1/fl_fss/c/fss.h
@@ -47,10 +47,10 @@ extern "C" {
   *
   * @return
   *   f_none on success.
- *   f_none_on_stop if the stop point is reached before all steps are completed.
+ *   f_none_on_stop if the stop location is reached before all steps are completed.
   *   f_none_on_eos if the end of buffer is reached before all steps are completed.
   *   f_invalid_parameter (with error bit) if a parameter is invalid.
- *   f_incomplete_utf_on_stop (with error bit) if the stop point is reached before the complete UTF-8 character can be processed.
+ *   f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
   *   f_incomplete_utf_on_eos (with error bit) if the end of buffer is reached before the complete UTF-8 character can be processed.
   */
  #ifndef _di_fl_fss_decrement_buffer_
@@ -118,10 +118,10 @@ extern "C" {
   *
   * @return
   *   f_none on success.
- *   f_none_on_stop if the stop point is reached before all steps are completed.
+ *   f_none_on_stop if the stop location is reached before all steps are completed.
   *   f_none_on_eos if the end of buffer is reached before all steps are completed.
   *   f_invalid_parameter (with error bit) if a parameter is invalid.
- *   f_incomplete_utf_on_stop (with error bit) if the stop point is reached before the complete UTF-8 character can be processed.
+ *   f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
   *   f_incomplete_utf_on_eos (with error bit) if the end of buffer is reached before the complete UTF-8 character can be processed.
   */
  #ifndef _di_fl_fss_increment_buffer_
diff --git a/level_1/fl_fss/c/fss_basic.c b/level_1/fl_fss/c/fss_basic.c

index cdd12cfbc3f93e02c8aab3cac7cf46703b721ed7..3a3adb3f41f33da91f61ff96f23224e546151887 100644 (file)
--- a/level_1/fl_fss/c/fss_basic.c
+++ b/level_1/fl_fss/c/fss_basic.c
@@ -435,7 +435,7 @@ extern "C" {
        ++input->start;
      } while (f_true);
  
-    // Save the stop point
+    // Save the stop location
      found->array[found->used].stop = input->start - 1;
      found->used++;
  
diff --git a/level_1/fl_serialized/c/serialized.c b/level_1/fl_serialized/c/serialized.c

index b81da09e114ce6ea9f6dc5ad2ea310141ea4a9fa..4d77ce579861f22b1e74462d79e06a7b3a3c5dc3 100644 (file)
--- a/level_1/fl_serialized/c/serialized.c
+++ b/level_1/fl_serialized/c/serialized.c
@@ -12,9 +12,8 @@ extern "C" {
  
      f_status status = f_none;
  
-
      if (serialized->used + value.used + 1 >= serialized->size) {
-      f_resize_dynamic_string(status, (*serialized), serialized->size + value.used + f_serialized_default_allocation_step);
+      f_resize_dynamic_string(status, (*serialized), serialized->size + value.used + 1);
  
        if (f_error_is_error(status)) return status;
      }
@@ -33,8 +32,8 @@ extern "C" {
    }
  #endif // _di_fl_serialize_simple_
  
-#ifndef _di_fl_unserialize_simple_
-  f_return_status fl_unserialize_simple(const f_dynamic_string serialized, f_string_locations *locations) {
+#ifndef _di_fl_unserialize_simple_map_
+  f_return_status fl_unserialize_simple_map(const f_dynamic_string serialized, f_string_locations *locations) {
      #ifndef _di_level_0_parameter_checking_
        if (locations == 0) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_0_parameter_checking_
@@ -44,15 +43,20 @@ extern "C" {
      f_array_length i = 0;
      f_array_length start = 0;
  
-    while (i <= serialized.used) {
+    f_u_short width = 0;
+
+    while (i < serialized.used) {
+      width = f_macro_utf_byte_width(serialized.string[i]);
+
        if (serialized.string[i] == f_serialized_simple_splitter || i == serialized.used) {
-        if (locations->used + 1 >= locations->size) {
+        if (locations->used + width >= locations->size) {
            f_resize_string_locations(status, (*locations), locations->size + f_serialized_default_allocation_step);
  
            if (f_error_is_error(status)) return status;
          }
  
          if (start == i) {
+          // provide an invalid start to stop range to communicate that there is no data.
            locations->array[locations->used].start = 1;
            locations->array[locations->used].stop = 0;
            locations->used++;
@@ -63,18 +67,25 @@ extern "C" {
            locations->used++;
          }
  
-        start = i + 1;
+        if (i + width > serialized.used) {
+          return f_error_set_error(f_incomplete_utf_on_eos);
+        }
+
+        start = i + width;
+      }
+      else if (i + width > serialized.used) {
+        return f_error_set_error(f_incomplete_utf_on_eos);
        }
  
-      i++;
+      i += width;
      } // while
  
      return f_none;
    }
-#endif // _di_fl_unserialize_simple_
+#endif // _di_fl_unserialize_simple_map_
  
-#ifndef _di_fl_unserialize_simple_get_
-  f_return_status fl_unserialize_simple_get(const f_dynamic_string serialized, const f_array_length index, f_string_location *location) {
+#ifndef _di_fl_unserialize_simple_find_
+  f_return_status fl_unserialize_simple_find(const f_dynamic_string serialized, const f_array_length index, f_string_location *location) {
      #ifndef _di_level_0_parameter_checking_
        if (location == 0) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_0_parameter_checking_
@@ -82,33 +93,51 @@ extern "C" {
      f_status status = f_none;
  
      f_array_length i = 0;
+    f_array_length start = 0;
      f_array_length current = 0;
  
-    location->start = 1;
-    location->stop = 0;
+    f_u_short width = 0;
  
      while (i < serialized.used) {
-      if (current == index) {
-        if (location->start > location->stop) {
-          location->start = i;
-          location->stop = i;
+      width = f_macro_utf_byte_width(serialized.string[i]);
+
+      if (serialized.string[i] == f_serialized_simple_splitter) {
+        if (current == index) {
+          if (start == i) {
+            // provide an invalid start to stop range to communicate that there is no data.
+            location->start = 1;
+            location->stop = 0;
+          }
+          else {
+            location->start = start;
+            location->stop = i - 1;
+          }
+
+          return f_none;
          }
  
-        if (serialized.string[i] == f_serialized_simple_splitter) {
+        start = i + width;
+        current++;
+      }
+      else if (i == serialized.used) {
+        if (current == index) {
+          location->start = start;
            location->stop = i - 1;
-          break;
          }
+
+        return f_none_on_eos;
        }
-      else if (serialized.string[i] == f_serialized_simple_splitter) {
-        current++;
+
+      if (i + width > serialized.used) {
+        return f_error_set_error(f_incomplete_utf_on_eos);
        }
  
-      i++;
+      i += width;
      } // while
  
-    return f_none;
+    return f_no_data_on_eos;
    }
-#endif // _di_fl_unserialize_simple_get_
+#endif // _di_fl_unserialize_simple_find_
  
  #ifdef __cplusplus
  } // extern "C"
diff --git a/level_1/fl_serialized/c/serialized.h b/level_1/fl_serialized/c/serialized.h

index db4882d85dbde516ee42001a55947d2fdf1e1111..3294f30625830e14c202eb86143557b02ca6eb6c 100644 (file)
--- a/level_1/fl_serialized/c/serialized.h
+++ b/level_1/fl_serialized/c/serialized.h
@@ -20,31 +20,92 @@
  #include <level_0/serialized.h>
  #include <level_0/strings.h>
  #include <level_0/types.h>
+#include <level_0/utf.h>
  
  #ifdef __cplusplus
  extern "C" {
  #endif
  
+/**
+ * Serialized a string using the Simple serialize algorithm.
+ *
+ * The simple Serialize algorithm is akin to the PATH environment variable, example: PATH="/bin:/sbin:/usr/bin".
+ *
+ * To generate the above example, this would be called 3 times, with the following strings:
+ *   1) value = "/bin", then: PATH="/bin".
+ *   2) value = "/sbin", then: PATH="/bin:/sbin".
+ *   3) value = "/usr/sbin", then: PATH="/bin:/sbin:/usr/sbin".
+ *
+ * @param value
+ *   The string to append onto serialized.
+ * @param serialized
+ *   The dynamic string that represents a serialized set of strings.
+ *
+ * @return
+ *   f_none on success.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *   f_reallocation_error (with error bit) on memory reallocation error.
+ */
  #ifndef _di_fl_serialize_simple_
-  /**
-   * this function will append a string to the serialize.
-   */
    extern f_return_status fl_serialize_simple(const f_dynamic_string value, f_dynamic_string *serialized);
  #endif // _di_fl_serialize_simple_
  
-#ifndef _di_fl_unserialize_simple_
-  /**
-   * this function will unserialize a serialized string and store the results in an array of strings.
-   */
-  extern f_return_status fl_unserialize_simple(const f_dynamic_string serialized, f_string_locations *locations);
-#endif // _di_fl_unserialize_
-
-#ifndef _di_fl_unserialize_simple_get_
-  /**
-   * this function will pull a single serialized value from the serialized string at the given index.
-   */
-  extern f_return_status fl_unserialize_simple_get(const f_dynamic_string serialized, const f_array_length index, f_string_location *location);
-#endif // _di_fl_unserialize_simple_get_
+// @todo: implement fl_unserialize_simple() such that a new array of strings is allocated.
+
+/**
+ * Identify string positions within a serialized string using the Simple serialize algorithm.
+ *
+ * The simple Serialize algorithm is akin to the PATH environment variable, example: PATH="/bin:/sbin:/usr/bin".
+ *
+ * After processing the above example, there would be the following positions:
+ *   1) start = 0, stop = 3.
+ *   2) start = 5, stop = 9.
+ *   3) start = 11, stop = 18.
+ *
+ * @param serialized
+ *   A serialized string to de-serialize.
+ * @param locations
+ *   The locations within the serialized string representing distinct separate strings.
+ *
+ * @return
+ *   f_none on success.
+ *   f_incomplete_utf_on_eos if end of sting is reached before a complete UTF-8 character can be processed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *   f_reallocation_error (with error bit) on memory reallocation error.
+ */
+#ifndef _di_fl_unserialize_simple_map_
+  extern f_return_status fl_unserialize_simple_map(const f_dynamic_string serialized, f_string_locations *locations);
+#endif // _di_fl_unserialize_simple_map_
+
+/**
+ * Unserialize a specific string using the Simple serialize algorithm.
+ *
+ * The simple Serialize algorithm is akin to the PATH environment variable, example: PATH="/bin:/sbin:/usr/bin".
+ *
+ * After processing the above example, there would be the following positions, for the given index:
+ *   1) with index = 0, start = 0, stop = 3.
+ *   2) with index = 1, start = 5, stop = 9.
+ *   3) with index = 2, start = 11, stop = 18.
+ *
+ * @param serialized
+ *   A serialized string to de-serialize.
+ * @param index
+ *   An index position within the serialized string to get the deserialized positions of.
+ * @param location
+ *   A location within the serialized string representing the string at the given index.
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eos on success at end of string.
+ *   f_no_data_on_eos if end of string reached before index was reached.
+ *   f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ */
+#ifndef _di_fl_unserialize_simple_find_
+  extern f_return_status fl_unserialize_simple_find(const f_dynamic_string serialized, const f_array_length index, f_string_location *location);
+#endif // _di_fl_unserialize_simple_find_
+
+// @todo: implement fl_unserialize_simple_get() such that a new string is allocated, if found.
  
  #ifdef __cplusplus
  } // extern "C"
diff --git a/level_1/fl_serialized/data/build/dependencies b/level_1/fl_serialized/data/build/dependencies

index 7a98ab941145aa2081e97045effefff90b26cba1..a34ebfa74ae5010c8b6f52b3870028bd3edded23 100644 (file)
--- a/level_1/fl_serialized/data/build/dependencies
+++ b/level_1/fl_serialized/data/build/dependencies
@@ -3,3 +3,4 @@ f_errors
  f_strings
  f_serialized
  f_memory
+f_utf
diff --git a/level_1/fl_serialized/data/build/settings b/level_1/fl_serialized/data/build/settings

index 6bab1364814a71dfd6805b2f7f3fbe7559e886f8..ddc683db3c0111140eb1148204718c460c6bf0fc 100644 (file)
--- a/level_1/fl_serialized/data/build/settings
+++ b/level_1/fl_serialized/data/build/settings
@@ -10,9 +10,9 @@ version_micro 0
  build_compiler gcc
  build_linker ar
  build_libraries -lc
-build_libraries_fll -lf_memory
+build_libraries_fll -lf_memory -lf_utf
  build_sources_library serialized.c
-build_sources_program 
+build_sources_program
  build_sources_headers serialized.h
  build_sources_bash
  build_sources_settings
diff --git a/level_1/fl_strings/c/strings.c b/level_1/fl_strings/c/strings.c

index a7d83872af138ff2b61875f3d1e90d23e412888a..9346025387579d7b1b70c0a6a8a1bbfffd398f20 100644 (file)
--- a/level_1/fl_strings/c/strings.c
+++ b/level_1/fl_strings/c/strings.c
@@ -4,32 +4,34 @@
  extern "C" {
  #endif
  
-// TODO: this file needs to be rewriten with UTF-8 support.
-
  #ifndef _di_fl_rip_string_
-  f_return_status fl_rip_string(const f_dynamic_string buffer, const f_string_location position, f_dynamic_string *results) {
+  f_return_status fl_rip_string(const f_dynamic_string buffer, const f_string_location location, f_dynamic_string *result) {
      #ifndef _di_level_1_parameter_checking_
-      if (results == 0) return f_error_set_error(f_invalid_parameter);
-      if (position.start < 0) return f_error_set_error(f_invalid_parameter);
-      if (position.stop < position.start) return f_error_set_error(f_invalid_parameter);
+      if (location.start < 0) return f_error_set_error(f_invalid_parameter);
+      if (location.stop < location.start) return f_error_set_error(f_invalid_parameter);
        if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter);
-      if (position.start >= buffer.used) return f_error_set_error(f_invalid_parameter);
+      if (location.start >= buffer.used) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_1_parameter_checking_
  
      // the start and stop point are inclusive locations, and therefore start - stop is actually 1 too few locations
-    f_string_length size = position.stop - position.start + 1;
+    f_string_length size = (location.stop - location.start) + 1;
  
      if (size > 0) {
        f_status status = f_none;
  
-      f_resize_dynamic_string(status, (*results), size);
+      if (result == 0) {
+        f_new_dynamic_string(status, (*result), size);
+      }
+      else {
+        f_resize_dynamic_string(status, (*result), size);
+      }
  
        if (f_error_is_error(status)) {
          return status;
        }
  
-      memcpy(results->string, buffer.string + position.start, sizeof(char) * size);
-      results->used = size;
+      memcpy(result->string, buffer.string + location.start, sizeof(char) * size);
+      result->used = size;
  
        return f_none;
      }
@@ -38,73 +40,268 @@ extern "C" {
    }
  #endif // _di_fl_rip_string_
  
-#ifndef _di_fl_seek_line_past_non_graph_
-  f_return_status fl_seek_line_past_non_graph(const f_dynamic_string buffer, f_string_location *position, const char placeholder) {
+#ifndef _di_fl_seek_line_until_graph_
+  f_return_status fl_seek_line_until_graph(const f_dynamic_string buffer, f_string_location *location, const char placeholder) {
      #ifndef _di_level_1_parameter_checking_
-      if (position == 0) return f_error_set_error(f_invalid_parameter);
-      if (position->start < 0) return f_error_set_error(f_invalid_parameter);
-      if (position->stop < position->start) return f_error_set_error(f_invalid_parameter);
+      if (location == 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start < 0) return f_error_set_error(f_invalid_parameter);
+      if (location->stop < location->start) return f_error_set_error(f_invalid_parameter);
        if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter);
-      if (position->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
+      if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_1_parameter_checking_
  
-    while (!isgraph(buffer.string[position->start]) || buffer.string[position->start] == placeholder) {
-      if (buffer.string[position->start] == f_eol) return f_none;
+    f_status status = f_none;
+    f_u_short width = 0;
+
+    f_string_length max_width = (location->stop - location->start) + 1;
+
+    if (max_width > buffer.used - location->start) {
+      max_width = buffer.used - location->start;
+    }
+
+    while (buffer.string[location->start] == placeholder || (!isgraph(buffer.string[location->start]) && (status = f_utf_is_graph(buffer.string + location->start, max_width)) == f_false)) {
+      if (buffer.string[location->start] == f_eol) return f_none_on_eol;
+
+      width = f_macro_utf_byte_width(buffer.string[location->start]);
+
+      if (width > 1) {
+        if (location->start + width >= buffer.used) return f_error_set_error(f_incomplete_utf_on_eos);
+        if (location->start + width > location->stop) return f_error_set_error(f_incomplete_utf_on_stop);
+      }
+
+      location->start += width;
+
+      if (location->start >= buffer.used) return f_none_on_eos;
+      if (location->start > location->stop) return f_none_on_stop;
  
-      ++position->start;
+      max_width = (location->stop - location->start) + 1;
  
-      if (position->start >= buffer.used) return f_none_on_eos;
-      if (position->start > position->stop) return f_none_on_stop;
+      if (max_width > buffer.used - location->start) {
+        max_width = buffer.used - location->start;
+      }
      } // while
  
+    if (f_error_is_error(status)) {
+      return status;
+    }
+
      return f_none;
    }
-#endif // _di_fl_seek_line_past_non_graph_
+#endif // _di_fl_seek_line_until_graph_
  
  #ifndef _di_fl_seek_line_until_non_graph_
-  f_return_status fl_seek_line_until_non_graph(const f_dynamic_string buffer, f_string_location *position, const char placeholder) {
+  f_return_status fl_seek_line_until_non_graph(const f_dynamic_string buffer, f_string_location *location, const char placeholder) {
      #ifndef _di_level_1_parameter_checking_
-      if (position->start < 0) return f_error_set_error(f_invalid_parameter);
-      if (position->stop < position->start) return f_error_set_error(f_invalid_parameter);
+      if (location == 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start < 0) return f_error_set_error(f_invalid_parameter);
+      if (location->stop < location->start) return f_error_set_error(f_invalid_parameter);
        if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter);
-      if (position->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
+      if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_1_parameter_checking_
  
-    while (isgraph(buffer.string[position->start]) || buffer.string[position->start] == placeholder) {
-      if (buffer.string[position->start] == f_eol) return f_none;
+    f_status status = f_none;
+    f_u_short width = 0;
+
+    f_string_length max_width = (location->stop - location->start) + 1;
+
+    if (max_width > buffer.used - location->start) {
+      max_width = buffer.used - location->start;
+    }
+
+    while (buffer.string[location->start] == placeholder || (isgraph(buffer.string[location->start]) && (status = f_utf_is_space(buffer.string + location->start, max_width)) == f_false)) {
+      if (buffer.string[location->start] == f_eol) return f_none_on_eol;
+
+      width = f_macro_utf_byte_width(buffer.string[location->start]);
+
+      if (width > 1) {
+        if (location->start + width >= buffer.used) return f_error_set_error(f_incomplete_utf_on_eos);
+        if (location->start + width > location->stop) return f_error_set_error(f_incomplete_utf_on_stop);
+      }
+
+      location->start += width;
  
-      ++position->start;
+      if (location->start >= buffer.used) return f_none_on_eos;
+      if (location->start > location->stop) return f_none_on_stop;
  
-      if (position->start >= buffer.used) return f_none_on_eos;
-      if (position->start > position->stop) return f_none_on_stop;
+      max_width = (location->stop - location->start) + 1;
+
+      if (max_width > buffer.used - location->start) {
+        max_width = buffer.used - location->start;
+      }
      } // while
  
+    if (f_error_is_error(status)) {
+      return status;
+    }
+
      return f_none;
    }
  #endif // _di_fl_seek_line_until_non_graph_
  
-#ifndef _di_fl_seek_to_
-  f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *position, const char seek_to_this) {
+#ifndef _di_fl_seek_line_to_
+  f_return_status fl_seek_line_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this) {
      #ifndef _di_level_1_parameter_checking_
-      if (position->start < 0) return f_error_set_error(f_invalid_parameter);
-      if (position->stop < position->start) return f_error_set_error(f_invalid_parameter);
+      if (location == 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start < 0) return f_error_set_error(f_invalid_parameter);
+      if (location->stop < location->start) return f_error_set_error(f_invalid_parameter);
        if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter);
-      if (position->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
+      if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
      #endif // _di_level_1_parameter_checking_
  
-    while (buffer.string[position->start] != seek_to_this) {
-      if (buffer.string[position->start] == f_eol) return f_none;
+    while (buffer.string[location->start] != seek_to_this) {
+      if (buffer.string[location->start] == f_eol) return f_none_on_eol;
  
-      ++position->start;
+      location->start++;
  
-      if (position->start >= buffer.used) return f_none_on_eos;
-      if (position->start > position->stop) return f_none_on_stop;
+      if (location->start >= buffer.used) return f_none_on_eos;
+      if (location->start > location->stop) return f_none_on_stop;
+    } // while
+
+    return f_none;
+  }
+#endif // _di_fl_seek_line_to_
+
+#ifndef _di_fl_seek_line_to_character_
+  f_return_status fl_seek_line_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this) {
+    #ifndef _di_level_1_parameter_checking_
+      if (location == 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start < 0) return f_error_set_error(f_invalid_parameter);
+      if (location->stop < location->start) return f_error_set_error(f_invalid_parameter);
+      if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
+    #endif // _di_level_1_parameter_checking_
+
+    const f_u_short seek_width = f_macro_utf_character_width(seek_to_this);
+
+    f_status status = f_none;
+
+    f_u_short width = 0;
+
+    f_string_length max_width = 0;
+
+    while (location->start < buffer.used) {
+      max_width = (location->stop - location->start) + 1;
+
+      if (max_width > buffer.used - location->start) {
+        max_width = buffer.used - location->start;
+      }
+
+      width = f_macro_utf_byte_width(buffer.string[location->start]);
+
+      if (width == 1) {
+        if (buffer.string[location->start] == f_eol) return f_none_on_eol;
+
+        if (seek_width == width) {
+          if (buffer.string[location->start] == seek_to_this) return f_none;
+        }
+      }
+      else {
+        if (location->start + width >= buffer.used) return f_error_set_error(f_incomplete_utf_on_eos);
+        if (location->start + width > location->stop) return f_error_set_error(f_incomplete_utf_on_stop);
+
+        if (width == seek_width) {
+          f_utf_character character = 0;
+          status = f_utf_char_to_character(buffer.string + location->start, max_width, &character);
+
+          if (f_error_is_error(status)) {
+            return status;
+          }
+
+          if (character == seek_to_this) {
+            return f_none;
+          }
+        }
+      }
+
+      location->start += width;
+
+      if (location->start >= location->stop) return f_none_on_stop;
+    } // while
+
+    return f_none_on_eos;
+  }
+#endif // _di_fl_seek_line_to_character_
+
+#ifndef _di_fl_seek_to_
+  f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this) {
+    #ifndef _di_level_1_parameter_checking_
+      if (location == 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start < 0) return f_error_set_error(f_invalid_parameter);
+      if (location->stop < location->start) return f_error_set_error(f_invalid_parameter);
+      if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
+    #endif // _di_level_1_parameter_checking_
+
+    while (buffer.string[location->start] != seek_to_this) {
+      location->start++;
+
+      if (location->start >= buffer.used) return f_none_on_eos;
+      if (location->start > location->stop) return f_none_on_stop;
      } // while
  
      return f_none;
    }
  #endif // _di_fl_seek_to_
  
+#ifndef _di_fl_seek_to_character_
+  f_return_status fl_seek_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this) {
+    #ifndef _di_level_1_parameter_checking_
+      if (location == 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start < 0) return f_error_set_error(f_invalid_parameter);
+      if (location->stop < location->start) return f_error_set_error(f_invalid_parameter);
+      if (buffer.used <= 0) return f_error_set_error(f_invalid_parameter);
+      if (location->start >= buffer.used) return f_error_set_error(f_invalid_parameter);
+    #endif // _di_level_1_parameter_checking_
+
+    const f_u_short seek_width = f_macro_utf_character_width(seek_to_this);
+
+    f_status status = f_none;
+
+    f_u_short width = 0;
+
+    f_string_length max_width = 0;
+
+    while (location->start < buffer.used) {
+      max_width = (location->stop - location->start) + 1;
+
+      if (max_width > buffer.used - location->start) {
+        max_width = buffer.used - location->start;
+      }
+
+      width = f_macro_utf_byte_width(buffer.string[location->start]);
+
+      if (width == 1) {
+        if (seek_width == width) {
+          if (buffer.string[location->start] == seek_to_this) return f_none;
+        }
+      }
+      else {
+        if (location->start + width >= buffer.used) return f_error_set_error(f_incomplete_utf_on_eos);
+        if (location->start + width > location->stop) return f_error_set_error(f_incomplete_utf_on_stop);
+
+        if (width == seek_width) {
+          f_utf_character character = 0;
+          status = f_utf_char_to_character(buffer.string + location->start, max_width, &character);
+
+          if (f_error_is_error(status)) {
+            return status;
+          }
+
+          if (character == seek_to_this) {
+            return f_none;
+          }
+        }
+      }
+
+      location->start += width;
+
+      if (location->start >= location->stop) return f_none_on_stop;
+    } // while
+
+    return f_none_on_eos;
+  }
+#endif // _di_fl_seek_to_character_
+
  #ifndef _di_fl_compare_strings_
    f_return_status fl_compare_strings(const f_string string1, const f_string string2, const f_string_length length1, const f_string_length length2) {
      #ifndef _di_level_1_parameter_checking_
@@ -115,22 +312,25 @@ extern "C" {
      f_string_length i1 = 0;
      f_string_length i2 = 0;
  
-    f_string_length stop1 = length1;
-    f_string_length stop2 = length2;
+    for (; i1 < length1 && i2 < length2; i1++, i2++) {
+      // skip past newlines in string1.
+      while (i1 < length1 && string1[i1] == f_eos) i1++;
+      if (i1 == length1) break;
  
-    for (; i1 < stop1 && i2 < stop2; i1++, i2++) {
-      while (i1 < stop1 && string1[i1] == f_eos) i1++;
-      while (i2 < stop2 && string2[i2] == f_eos) i2++;
+      // skip past newlines in string2.
+      while (i2 < length2 && string2[i2] == f_eos) i2++;
+      if (i2 == length2) break;
  
        if (string1[i1] != string2[i2]) return f_not_equal_to;
      } // for
  
-    while (i1 < stop1) {
+    // only return f_equal_to if all remaining characters are NULL.
+    while (i1 < length1) {
        if (string1[i1] != f_eos) return f_not_equal_to;
        i1++;
      } // while
  
-    while (i2 < stop2) {
+    while (i2 < length2) {
        if (string2[i2] != f_eos) return f_not_equal_to;
        i2++;
      } // while
@@ -149,22 +349,25 @@ extern "C" {
      f_string_length i1 = 0;
      f_string_length i2 = 0;
  
-    f_string_length stop1 = string1.used;
-    f_string_length stop2 = string2.used;
+    for (; i1 < string1.used && i2 < string2.used; i1++, i2++) {
+      // skip past newlines in string1.
+      while (i1 < string1.used && string1.string[i1] == f_eos) i1++;
+      if (i1 == string1.used) break;
  
-    for (; i1 < stop1 && i2 < stop2; i1++, i2++) {
-      while (i1 < stop1 && string1.string[i1] == f_eos) i1++;
-      while (i2 < stop2 && string2.string[i2] == f_eos) i2++;
+      // skip past newlines in string2.
+      while (i2 < string2.used && string2.string[i2] == f_eos) i2++;
+      if (i2 == string2.used) break;
  
        if (string1.string[i1] != string2.string[i2]) return f_not_equal_to;
      } // for
  
-    while (i1 < stop1) {
+    // only return f_equal_to if all remaining characters are NULL.
+    while (i1 < string1.used) {
        if (string1.string[i1] != f_eos) return f_not_equal_to;
        i1++;
      } // while
  
-    while (i2 < stop2) {
+    while (i2 < string2.used) {
        if (string2.string[i2] != f_eos) return f_not_equal_to;
        i2++;
      } // while
@@ -174,7 +377,7 @@ extern "C" {
  #endif // _di_fl_compare_dynamic_strings_
  
  #ifndef _di_fl_compare_partial_dynamic_strings_
-  f_return_status fl_compare_partial_dynamic_strings(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2) {
+  f_return_status fl_compare_dynamic_strings_partial(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2) {
      #ifndef _di_level_1_parameter_checking_
        if (string1.used <= 0) return f_error_set_error(f_invalid_parameter);
        if (string2.used <= 0) return f_error_set_error(f_invalid_parameter);
@@ -189,16 +392,22 @@ extern "C" {
      f_string_length i1 = offset1.start;
      f_string_length i2 = offset2.start;
  
-    f_string_length stop1 = offset1.stop + 1;
-    f_string_length stop2 = offset2.stop + 1;
+    const f_string_length stop1 = offset1.stop + 1;
+    const f_string_length stop2 = offset2.stop + 1;
  
      for (; i1 < stop1 && i2 < stop2; i1++, i2++) {
+      // skip past newlines in string1.
        while (i1 < stop1 && string1.string[i1] == f_eos) i1++;
+      if (i1 == stop1) break;
+
+      // skip past newlines in string2.
        while (i2 < stop2 && string2.string[i2] == f_eos) i2++;
+      if (i2 == stop2) break;
  
        if (string1.string[i1] != string2.string[i2]) return f_not_equal_to;
      } // for
  
+    // only return f_equal_to if all remaining characters are NULL.
      while (i1 < stop1) {
        if (string1.string[i1] != f_eos) return f_not_equal_to;
        i1++;
diff --git a/level_1/fl_strings/c/strings.h b/level_1/fl_strings/c/strings.h

index a8027da29e76c2af65e9513f74a5d4e1394787bb..5b4c1979a7e29f703516adaa378cb9b45b3823ad 100644 (file)
--- a/level_1/fl_strings/c/strings.h
+++ b/level_1/fl_strings/c/strings.h
@@ -25,61 +25,254 @@
  extern "C" {
  #endif
  
+/**
+ * Allocated a new string from the provided range in the buffer.
+ *
+ * @param buffer
+ *   The buffer to rip from.
+ * @param location
+ *   A range within the buffer representing the string to rip.
+ * @param result
+ *   The new string, which will be allocated or reallocated as necessary.
+ *
+ * @return
+ *   f_none on success.
+ *   f_no_data if nothing to rip, no allocations or reallocations are performed.
+ *   f_incomplete_utf_on_eos if end of sting is reached before a complete UTF-8 character can be processed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *   f_allocation_error (with error bit) on memory allocation error.
+ *   f_reallocation_error (with error bit) on memory reallocation error.
+ */
  #ifndef _di_fl_rip_string_
-  /**
-   * given a start and stop position, this will return a new string based from the supplied buffer, based on the passed positions.
-   * this will replace/overwrite existing information inside of the results variable.
-   */
-  extern f_return_status fl_rip_string(const f_dynamic_string buffer, const f_string_location position, f_dynamic_string *results);
+  extern f_return_status fl_rip_string(const f_dynamic_string buffer, const f_string_location location, f_dynamic_string *result);
  #endif // _di_fl_rip_string_
  
-#ifndef _di_fl_seek_line_past_non_graph_
-  /**
-   * given a dynamic string and a string location, seek past all non-graph characters until a graph is reached.
-   * will ignore the given placeholder.
-   */
-  extern f_return_status fl_seek_line_past_non_graph(const f_dynamic_string buffer, f_string_location *position, const char placeholder);
-#endif // _di_fl_seek_line_past_non_graph_
+/**
+ * Increment buffer location until a graph character (including UTF-8) or an EOL is matched.
+ *
+ * This will ignore the UTF-8 BOM.
+ *
+ * @param buffer
+ *   The buffer to traverse.
+ * @param location
+ *   A range within the buffer representing the start and stop locations.
+ * @param placeholder
+ *   A single-width character representing a placeholder to ignore (may be NULL).
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eol on success, but stopped at EOL.
+ *   f_none_on_eos on success, but stopped at end of buffer.
+ *   f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *   f_allocation_error (with error bit) on memory allocation error.
+ *   f_reallocation_error (with error bit) on memory reallocation error.
+ */
+#ifndef _di_fl_seek_line_until_graph_
+  extern f_return_status fl_seek_line_until_graph(const f_dynamic_string buffer, f_string_location *location, const char placeholder);
+#endif // _di_fl_seek_line_until_graph_
  
+/**
+ * Increment buffer location until a non-graph character (including UTF-8) or an EOL is matched.
+ *
+ * This will ignore the UTF-8 BOM.
+ *
+ * @param buffer
+ *   The buffer to traverse.
+ * @param location
+ *   A range within the buffer representing the start and stop locations.
+ * @param placeholder
+ *   A single-width character representing a placeholder to ignore (may be NULL).
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eol on success, but stopped at EOL.
+ *   f_none_on_eos on success, but stopped at end of buffer.
+ *   f_none_on_stop on success, but stopped stop location.
+ *   f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *   f_allocation_error (with error bit) on memory allocation error.
+ *   f_reallocation_error (with error bit) on memory reallocation error.
+ */
  #ifndef _di_fl_seek_line_until_non_graph_
-  /**
-   * given a dynamic string and a string location, seek past all graph characters until a non-graph is reached.
-   * will ignore the given placeholder.
-   */
-  extern f_return_status fl_seek_line_until_non_graph(const f_dynamic_string buffer, f_string_location *position, const char placeholder);
+  extern f_return_status fl_seek_line_until_non_graph(const f_dynamic_string buffer, f_string_location *location, const char placeholder);
  #endif // _di_fl_seek_line_until_non_graph_
  
+/**
+ * Seek the buffer location forward until the character (1-byte wide) or EOL is reached.
+ *
+ * @param buffer
+ *   The buffer to traverse.
+ * @param location
+ *   A range within the buffer representing the start and stop locations.
+ *   The start location will be incremented by seek.
+ * @param seek_to_this
+ *   A single-width character representing a character to seek to.
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eol on success, but stopped at EOL.
+ *   f_none_on_eos on success, but stopped at end of buffer.
+ *   f_none_on_stop on success, but stopped stop location.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see: fl_seek_line_to_character()
+ */
+#ifndef _di_fl_seek_line_to_
+  extern f_return_status fl_seek_line_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this);
+#endif // _di_fl_seek_line_to_
+
+/**
+ * Seek the buffer location forward until the character (up to 4-byte wide) or EOL is reached.
+ *
+ * @param buffer
+ *   The buffer to traverse.
+ * @param location
+ *   A range within the buffer representing the start and stop locations.
+ *   The start location will be incremented by seek.
+ * @param seek_to_this
+ *   A 1-width, 2-width, 3-width, or 4-width character representing a character to seek to.
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eol on success, but stopped at EOL.
+ *   f_none_on_eos on success, but stopped at end of buffer.
+ *   f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see: fl_seek_line_to()
+ */
+#ifndef _di_fl_seek_line_to_character_
+  extern f_return_status fl_seek_line_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this);
+#endif // _di_fl_seek_line_to_character_
+
+/**
+ * Seek the buffer location forward until the character (1-byte wide) is reached.
+ *
+ * @param buffer
+ *   The buffer to traverse.
+ * @param location
+ *   A range within the buffer representing the start and stop locations.
+ *   The start location will be incremented by seek.
+ * @param seek_to_this
+ *   A single-width character representing a character to seek to.
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eos on success, but stopped at end of buffer.
+ *   f_none_on_stop on success, but stopped stop location.
+ *   f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see: fl_seek_to_character()
+ */
  #ifndef _di_fl_seek_to_
-  /**
-   * given a dynamic string and a string location, seek past all characters until the given character is reached.
-   */
-  extern f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *position, const char seek_to_this);
+  extern f_return_status fl_seek_to(const f_dynamic_string buffer, f_string_location *location, const char seek_to_this);
  #endif // _di_fl_seek_to_
  
+/**
+ * Seek the buffer location forward until the character (up to 4-byte wide) is reached.
+ *
+ * @param buffer
+ *   The buffer to traverse.
+ * @param location
+ *   A range within the buffer representing the start and stop locations.
+ *   The start location will be incremented by seek.
+ * @param seek_to_this
+ *   A 1-width, 2-width, 3-width, or 4-width character representing a character to seek to.
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eos on success, but stopped at end of buffer.
+ *   f_incomplete_utf_on_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   f_incomplete_utf_on_eos (with error bit) if end of string is reached before a complete UTF-8 character can be processed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see: fl_seek_to()
+ */
+#ifndef _di_fl_seek_to_character_
+  extern f_return_status fl_seek_to_character(const f_dynamic_string buffer, f_string_location *location, const f_utf_character seek_to_this);
+#endif // _di_fl_seek_to_character_
+
+/**
+ * Compare two strings, similar to strncmp().
+ *
+ * This does not stop on NULL.
+ * NULL characters are ignored.
+ *
+ * @param string1
+ *   String to compare.
+ * @param string2
+ *   String to compare.
+ * @param length1
+ *   Length of string1.
+ * @param length2
+ *   Length of string2.
+ *
+ * @return
+ *   f_equal_to when both strings equal.
+ *   f_not_equal_to when both strings do not equal.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see: fl_compare_dynamic_strings()
+ * @see: fl_compare_dynamic_strings_partial()
+ */
  #ifndef _di_fl_compare_strings_
-  /**
-   * this compares two strings and works similar to that of strncmp(..) but has significant differences to strncmp(..).
-   * given two strings, this will return either f_equal_to or f_not_equal_to.
-   * this does not stop on f_eos and f_eos will be ignored as if it were not taking up any space, therefor a 5 character string could return f_equal_to if the 5 character string contains an f_eos anywhere within it.
-   */
    extern f_return_status fl_compare_strings(const f_string string1, const f_string string2, const f_string_length length1, const f_string_length length2);
  #endif // _di_fl_compare_strings_
  
+/**
+ * Compare two strings, similar to strncmp().
+ *
+ * This does not stop on NULL.
+ * NULL characters are ignored.
+ *
+ * @param string1
+ *   String to compare.
+ * @param string2
+ *   String to compare.
+ *
+ * @return
+ *   f_equal_to when both strings equal.
+ *   f_not_equal_to when both strings do not equal.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see: fl_compare_strings()
+ * @see: fl_compare_dynamic_strings_partial()
+ */
  #ifndef _di_fl_compare_dynamic_strings_
-  /**
-   * this compares two dynamic strings and works similar to that of strncmp(..) but has significant differences to strncmp(..).
-   * given two strings, this will return either f_equal_to or f_not_equal_to.
-   * this is far safer than fl_compare_strings(..) as dynamic string contain size information within them.
-   * this does not stop on f_eos and f_eos will be ignored as if it were not taking up any space, therefor a 5 character string could return f_equal_to if the 5 character string contains an f_eos anywhere within it.
-   */
    extern f_return_status fl_compare_dynamic_strings(const f_dynamic_string string1, const f_dynamic_string string2);
  #endif // _di_fl_compare_dynamic_strings_
  
+/**
+ * Compare two strings, similar to strncmp(), but restricted to the given ranges.
+ *
+ * This does not stop on NULL.
+ * NULL characters are ignored.
+ *
+ * @param string1
+ *   String to compare.
+ * @param string2
+ *   String to compare.
+ * @param offset1
+ *   A range within the string1 to restrict the comparison to.
+ * @param offset2
+ *   A range within the string2 to restrict the comparison to.
+ *
+ * @return
+ *   f_equal_to when both strings equal.
+ *   f_not_equal_to when both strings do not equal.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *
+ * @see: fl_compare_strings()
+ * @see: fl_compare_dynamic_strings()
+ */
  #ifndef _di_fl_compare_partial_dynamic_strings_
-  /**
-   * this functions identical to fl_compare_dynamic_strings, but uses offsets for both strings.
-   */
-  extern f_return_status fl_compare_partial_dynamic_strings(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2);
+  extern f_return_status fl_compare_dynamic_strings_partial(const f_dynamic_string string1, const f_dynamic_string string2, const f_string_location offset1, const f_string_location offset2);
  #endif // _di_fl_compare_partial_dynamic_strings_
  
  #ifdef __cplusplus
diff --git a/level_3/firewall/c/private-firewall.c b/level_3/firewall/c/private-firewall.c

index b104a1bc900e57579301fbe4710ee505d1bdeea0..34aaf0161f5b19393972675788ad5e80f75755ff 100644 (file)
--- a/level_3/firewall/c/private-firewall.c
+++ b/level_3/firewall/c/private-firewall.c
@@ -919,7 +919,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved
      location.stop = firewall_group_main_length - 1;
      fixed_string.string = firewall_group_main;
      fixed_string.used = firewall_group_main_length;
-    if (fl_compare_partial_dynamic_strings(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) {
+    if (fl_compare_dynamic_strings_partial(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) {
        new_chain = f_false;
        reserved->has_main = f_true;
        reserved->main_at = i;
@@ -930,7 +930,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved
      location.stop = firewall_group_stop_length - 1;
      fixed_string.string = firewall_group_stop;
      fixed_string.used = firewall_group_stop_length;
-    if (fl_compare_partial_dynamic_strings(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) {
+    if (fl_compare_dynamic_strings_partial(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) {
        new_chain = f_false;
        reserved->has_stop = f_true;
        reserved->stop_at = i;
@@ -941,7 +941,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved
      location.stop = firewall_group_lock_length - 1;
      fixed_string.string = firewall_group_lock;
      fixed_string.used = firewall_group_lock_length;
-    if (fl_compare_partial_dynamic_strings(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) {
+    if (fl_compare_dynamic_strings_partial(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) {
        new_chain = f_false;
        reserved->has_lock = f_true;
        reserved->lock_at = i;
@@ -952,7 +952,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved
      location.stop = firewall_group_lock_length - 1;
      fixed_string.string = firewall_chain_none;
      fixed_string.used = firewall_chain_none_length;
-    if (fl_compare_partial_dynamic_strings(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) {
+    if (fl_compare_dynamic_strings_partial(local->buffer, fixed_string, local->chain_objects.array[i], location) == f_equal_to) {
        new_chain = f_false;
      }
  
@@ -963,7 +963,7 @@ f_return_status firewall_create_custom_chains(firewall_reserved_chains *reserved
          location.start = 0;
          location.stop = data->chains.array[j].used - 1;
  
-        if (fl_compare_partial_dynamic_strings(local->buffer, data->chains.array[j], local->chain_objects.array[i], location) == f_equal_to) {
+        if (fl_compare_dynamic_strings_partial(local->buffer, data->chains.array[j], local->chain_objects.array[i], location) == f_equal_to) {
            new_chain = f_false;
            local->chain_ids.array[i] = j;
  
diff --git a/level_3/firewall/data/build/dependencies b/level_3/firewall/data/build/dependencies

index 55693f49feeb67168b0df22b5e54acf344e14c84..4b4f94dc572adab9d0b51337b6e974fd929ecc5f 100644 (file)
--- a/level_3/firewall/data/build/dependencies
+++ b/level_3/firewall/data/build/dependencies
@@ -6,6 +6,7 @@ f_pipe
  f_print
  f_strings
  f_types
+f_utf
  fl_colors
  fl_console
  fl_directory
diff --git a/level_3/init/c/init.c b/level_3/init/c/init.c

index 960402545da059508d81818b66f289abf866d0d6..5b7d7d5282e20c628b85e7aee2ab80d08e03eacd 100644 (file)
--- a/level_3/init/c/init.c
+++ b/level_3/init/c/init.c
@@ -217,7 +217,7 @@ extern "C" {
  
  
        // sit and wait for signals.
-      while (1) {
+      for (;;) {
          signal_result = sigwaitinfo(&signal_mask, &signal_information);
  
          if (signal_result < 0) {
@@ -266,7 +266,7 @@ extern "C" {
  
          memset(&signal_information, 0, sizeof(siginfo_t));
          continue;
-      }
+      } // for
      }
  
      init_delete_argument((*argument));
diff --git a/level_3/init/data/build/dependencies b/level_3/init/data/build/dependencies

index 55693f49feeb67168b0df22b5e54acf344e14c84..4b4f94dc572adab9d0b51337b6e974fd929ecc5f 100644 (file)
--- a/level_3/init/data/build/dependencies
+++ b/level_3/init/data/build/dependencies
@@ -6,6 +6,7 @@ f_pipe
  f_print
  f_strings
  f_types
+f_utf
  fl_colors
  fl_console
  fl_directory
author	Kevin Day <thekevinday@gmail.com>
	Wed, 28 Aug 2019 04:59:00 +0000 (23:59 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Wed, 28 Aug 2019 04:59:00 +0000 (23:59 -0500)
level_0/f_serialized/c/serialized.h		patch \| blob \| history
level_0/f_utf/c/utf.c		patch \| blob \| history
level_0/f_utf/c/utf.h		patch \| blob \| history
level_1/fl_fss/c/fss.c		patch \| blob \| history
level_1/fl_fss/c/fss.h		patch \| blob \| history
level_1/fl_fss/c/fss_basic.c		patch \| blob \| history
level_1/fl_serialized/c/serialized.c		patch \| blob \| history
level_1/fl_serialized/c/serialized.h		patch \| blob \| history
level_1/fl_serialized/data/build/dependencies		patch \| blob \| history
level_1/fl_serialized/data/build/settings		patch \| blob \| history
level_1/fl_strings/c/strings.c		patch \| blob \| history
level_1/fl_strings/c/strings.h		patch \| blob \| history
level_3/firewall/c/private-firewall.c		patch \| blob \| history
level_3/firewall/data/build/dependencies		patch \| blob \| history
level_3/init/c/init.c		patch \| blob \| history
level_3/init/data/build/dependencies		patch \| blob \| history