Feature: Provide and implement f_fll_id_t and fl_string_fll_identify().

author Kevin Day <thekevinday@gmail.com>

Sat, 29 May 2021 23:00:02 +0000 (18:00 -0500)

committer Kevin Day <thekevinday@gmail.com>

Sat, 29 May 2021 23:00:02 +0000 (18:00 -0500)
author Kevin Day <thekevinday@gmail.com>
Sat, 29 May 2021 23:00:02 +0000 (18:00 -0500)
committer Kevin Day <thekevinday@gmail.com>
Sat, 29 May 2021 23:00:02 +0000 (18:00 -0500)
diff --git a/level_0/f_type/c/type.h b/level_0/f_type/c/type.h

index 2b514c4210eb6b17d1314706e2e1d7ab795257a1..4a322aecda3a993b0e5482f5236f8f1cfc9531a0 100644 (file)
--- a/level_0/f_type/c/type.h
+++ b/level_0/f_type/c/type.h
@@ -1041,6 +1041,116 @@ extern "C" {
    #define f_statess_t_initialize { 0, 0, 0 }
  #endif // _di_f_type_statess_t_
  
+/**
+ * An FLL Identifier represents a way to identify FLL related text files and possibly binary files.
+ *
+ * This was originally designed for FSS (Featureless Settings Specification) but has been expanded to be used by the entire project.
+ *
+ * An FLL Identifier consists of two parts:
+ * 1) A 64-byte long string representing a machine-name for the type.
+ * 2) A 16-bit digit representing 2^16 possible type classifications, which is representeed by 4 hexadecial 1-byte strings for text.
+ *
+ * For example "fss-1234" is a valid type such that:
+ * 1) name: "fss".
+ * 2) type: "1234", (or 0x1 in binary.
+ *
+ * UTF-8 codes are available, but one must consider that 1 4-byte characte still takes up 4-bytes.
+ * Therefore for all 4-byte UTF-8 characters, there is a maximum of 16 characters available.
+ *
+ * This intentionally utilizes a fixed array to avoid the need of dynamic allocation.
+ *
+ * The macro_f_fll_id_t_clear() is provided for "consistency in design" reasons.
+ * However, it is probably a better idea to use memset to clear the name array.
+ *
+ * The name must only be "word" characters (therefore "-" is not allowed).
+ *
+ * name: The string representing the name of the FLL Identifier (This is not a NULL terminated string).
+ * type: The code representing the type of the Identifier.
+ * used: A representation of how many bytes in name are in used (if 0, then there is no name, thus this can be seen as not a valid identifier).
+ */
+#ifndef _di_f_fll_id_t_
+  typedef struct {
+    char name[64];
+
+    uint16_t type;
+    uint8_t used;
+  } f_fll_id_t;
+
+  #define f_fll_id_t_initialize { \
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, \
+    0, \
+    0, \
+  }
+
+  #define macro_f_fll_id_t_clear(id) \
+    id.name[0] = 0; \
+    id.name[1] = 0; \
+    id.name[2] = 0; \
+    id.name[3] = 0; \
+    id.name[4] = 0; \
+    id.name[5] = 0; \
+    id.name[6] = 0; \
+    id.name[7] = 0; \
+    id.name[8] = 0; \
+    id.name[9] = 0; \
+    id.name[10] = 0; \
+    id.name[11] = 0; \
+    id.name[12] = 0; \
+    id.name[13] = 0; \
+    id.name[14] = 0; \
+    id.name[15] = 0; \
+    id.name[16] = 0; \
+    id.name[17] = 0; \
+    id.name[18] = 0; \
+    id.name[19] = 0; \
+    id.name[20] = 0; \
+    id.name[21] = 0; \
+    id.name[22] = 0; \
+    id.name[23] = 0; \
+    id.name[24] = 0; \
+    id.name[25] = 0; \
+    id.name[26] = 0; \
+    id.name[27] = 0; \
+    id.name[28] = 0; \
+    id.name[29] = 0; \
+    id.name[30] = 0; \
+    id.name[31] = 0; \
+    id.name[32] = 0; \
+    id.name[33] = 0; \
+    id.name[34] = 0; \
+    id.name[35] = 0; \
+    id.name[36] = 0; \
+    id.name[37] = 0; \
+    id.name[38] = 0; \
+    id.name[39] = 0; \
+    id.name[40] = 0; \
+    id.name[41] = 0; \
+    id.name[42] = 0; \
+    id.name[43] = 0; \
+    id.name[44] = 0; \
+    id.name[45] = 0; \
+    id.name[46] = 0; \
+    id.name[47] = 0; \
+    id.name[48] = 0; \
+    id.name[49] = 0; \
+    id.name[50] = 0; \
+    id.name[51] = 0; \
+    id.name[52] = 0; \
+    id.name[53] = 0; \
+    id.name[54] = 0; \
+    id.name[55] = 0; \
+    id.name[56] = 0; \
+    id.name[57] = 0; \
+    id.name[58] = 0; \
+    id.name[59] = 0; \
+    id.name[60] = 0; \
+    id.name[61] = 0; \
+    id.name[62] = 0; \
+    id.name[63] = 0; \
+    id.type = 0; \
+    id.used = 0;
+#endif // _di_f_fll_id_t__
+
  #ifdef __cplusplus
  } // extern "C"
  #endif
diff --git a/level_1/fl_string/c/string.c b/level_1/fl_string/c/string.c

index 6518717cd5d34a93e59f6f091cf207bf64417359..b1a8263fd4d656b31d43f00ef521cf273b024561 100644 (file)
--- a/level_1/fl_string/c/string.c
+++ b/level_1/fl_string/c/string.c
@@ -546,6 +546,168 @@ extern "C" {
    }
  #endif // _di_fl_string_dynamic_seek_to_utf_character_
  
+#ifndef _di_fl_string_fll_identify_
+  f_status_t fl_string_fll_identify(const f_string_t buffer, const f_array_length_t length, f_fll_id_t *id) {
+    #ifndef _di_level_1_parameter_checking_
+      if (!length) return F_status_set_error(F_parameter);
+    #endif // _di_level_1_parameter_checking_
+
+    if (id) {
+      id->type = 0;
+      id->used = 0;
+    }
+
+    // The FLL identifier is always at least 6 characters ("X-0000").
+    if (length < 6) {
+      return F_false;
+    }
+
+    f_status_t status = F_none;
+    f_array_length_t i = 0;
+    f_array_length_t remaining = length;
+
+    for (; i < length; ) {
+
+      status = f_utf_is_whitespace(buffer + i, remaining);
+
+      if (F_status_is_error(status)) {
+        if (F_status_set_fine(status) == F_maybe) {
+          return F_status_set_error(F_complete_not_utf);
+        }
+
+        return status;
+      }
+
+      if (status == F_false) {
+        if (buffer[i] == 0) {
+          ++i;
+          --remaining;
+          continue;
+        }
+
+        break;
+      }
+
+      i += macro_f_utf_byte_width(buffer[i]);
+      remaining -= macro_f_utf_byte_width(buffer[i]);
+    } // for
+
+    if (remaining < 6) {
+      return F_false;
+    }
+
+    f_array_length_t j = i;
+
+    for (; i < length; ) {
+
+      status = f_utf_is_word(buffer + i, remaining, F_true);
+      if (F_status_is_error(status)) return status;
+
+      if (status == F_false) {
+        if (buffer[i] == 0) {
+          ++i;
+          --remaining;
+          continue;
+        }
+
+        break;
+      }
+
+      i += macro_f_utf_byte_width(buffer[i]);
+      remaining -= macro_f_utf_byte_width(buffer[i]);
+    } // for
+
+    if (i > length || buffer[i] != f_string_ascii_minus[0]) {
+      return F_false;
+    }
+
+    {
+      f_array_length_t k = 0;
+
+      for (; i < length && k < 5; ++i, --remaining, ++k) {
+
+        // The hexidecimal representing the number may only be ASCII.
+        if (macro_f_utf_byte_width_is(buffer[i])) {
+          if (id) {
+            id->type = 0;
+          }
+
+          return F_false;
+        }
+
+        if (isxdigit(buffer[i])) {
+          if (id) {
+            if (k) {
+              id->type *= 16;
+              id->type += strtol(buffer + i, 0, 16);
+            }
+            else {
+              id->type = strtol(buffer + i, 0, 16);
+            }
+          }
+        }
+        else {
+          if (buffer[i] == 0) continue;
+
+          if (id) {
+            id->type = 0;
+          }
+
+          return F_false;
+        }
+      } // for
+
+      if (i > length || k > 4) {
+        if (id) {
+          id->type = 0;
+        }
+
+        return F_false;
+      }
+    }
+
+    // The end of line, string, or end of length are the only valid stop points.
+    if (i < length && buffer[i] != f_string_eol_s[0]) {
+      status = f_utf_is_whitespace(buffer + i, remaining);
+
+      if (F_status_is_error(status)) {
+        if (id) {
+          id->type = 0;
+        }
+
+        return status;
+      }
+
+      if (status == F_false) {
+        if (id) {
+          id->type = 0;
+        }
+
+        return F_none;
+      }
+    }
+
+    if (id) {
+      for (i = j, j = 0; i < length; ++i) {
+
+        if (buffer[i] == 0) continue;
+        if (buffer[i] == f_string_ascii_minus[0]) break;
+
+        id->name[j] = buffer[i];
+        ++j;
+      } // for
+
+      if (j < 64) {
+        id->name[j] = 0;
+      }
+
+      id->used = j;
+    }
+
+    return F_true;
+  }
+#endif // _di_fl_string_fll_identify_
+
  #ifndef _di_fl_string_rip_
    f_status_t fl_string_rip(const f_string_t source, const f_array_length_t length, f_string_dynamic_t *destination) {
      #ifndef _di_level_1_parameter_checking_
diff --git a/level_1/fl_string/c/string.h b/level_1/fl_string/c/string.h

index 41e9b4bce68552d418a1556ebd83baa3b9c1cd25..60a5f8c5fe5161cb7b5bf7ac9e2f47df7c4c8e9d 100644 (file)
--- a/level_1/fl_string/c/string.h
+++ b/level_1/fl_string/c/string.h
@@ -919,6 +919,43 @@ extern "C" {
  #endif // _di_fl_string_dynamic_seek_to_utf_character_
  
  /**
+ * Identify whether or not the given string represents a single valid FLL Identifier and possibly save the FLL Identifier.
+ *
+ * This will always change the FLL Identifier used and type codes, if a FLL Identifier is provided.
+ *
+ * Whitespace may be before and after the FLL Identifier and will be ignored.
+ * Anything else will result in treating the character as a possible FLL Identifier.
+ *
+ * A valid FLL Identifier must terminate on either whitespace, EOL, or the stop point (length).
+ *
+ * @param buffer
+ *   The string to process.
+ * @param length
+ *   The number of bytes within the buffer to process.
+ *   Must be greater than 0.
+ * @param id
+ *   (optional) The FLL Identifier found.
+ *   Set to NULL to not use.
+ *
+ * @return
+ *   F_false if the buffer does not represent a valid FLL Identifier.
+ *   F_true if the buffer does represent a valid FLL Identifier.
+ *
+ *   F_complete_not_utf (with error bit) if a character is an incomplete UTF-8 fragment.
+ *   F_maybe (with error bit) if a character could be a whitespace but width is not long enough. (This is only returned for an otherwise valid FLL Identifier.)
+ *   F_parameter (with error bit) if a parameter is invalid.
+ *   F_utf (with error bit) if character is an invalid UTF-8 character.
+ *
+ * @see isxdigit()
+ *
+ * @see f_utf_is_whitespace()
+ * @see f_utf_is_word()
+ */
+#ifndef _di_fl_string_fll_identify_
+  extern f_status_t fl_string_fll_identify(const f_string_t buffer, const f_array_length_t length, f_fll_id_t *id);
+#endif // _di_fl_string_fll_identify_
+
+/**
   * Allocate a new string from the provided range in the string.
   *
   * Ignores leading and trailing whitespace.
author	Kevin Day <thekevinday@gmail.com>
	Sat, 29 May 2021 23:00:02 +0000 (18:00 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Sat, 29 May 2021 23:00:02 +0000 (18:00 -0500)
level_0/f_type/c/type.h		patch \| blob \| history
level_1/fl_string/c/string.c		patch \| blob \| history
level_1/fl_string/c/string.h		patch \| blob \| history