From a89cb6c0625eaa81770b27ce245adc1a40c9f71e Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Sat, 29 May 2021 18:00:02 -0500 Subject: [PATCH] Feature: Provide and implement f_fll_id_t and fl_string_fll_identify(). The Featureless Settings Specification identifiers are a decent format. Make a more generalized structure for processing these for the entire project and not just FSS. --- level_0/f_type/c/type.h | 110 +++++++++++++++++++++++++++++ level_1/fl_string/c/string.c | 162 +++++++++++++++++++++++++++++++++++++++++++ level_1/fl_string/c/string.h | 37 ++++++++++ 3 files changed, 309 insertions(+) diff --git a/level_0/f_type/c/type.h b/level_0/f_type/c/type.h index 2b514c4..4a322ae 100644 --- a/level_0/f_type/c/type.h +++ b/level_0/f_type/c/type.h @@ -1041,6 +1041,116 @@ extern "C" { #define f_statess_t_initialize { 0, 0, 0 } #endif // _di_f_type_statess_t_ +/** + * An FLL Identifier represents a way to identify FLL related text files and possibly binary files. + * + * This was originally designed for FSS (Featureless Settings Specification) but has been expanded to be used by the entire project. + * + * An FLL Identifier consists of two parts: + * 1) A 64-byte long string representing a machine-name for the type. + * 2) A 16-bit digit representing 2^16 possible type classifications, which is representeed by 4 hexadecial 1-byte strings for text. + * + * For example "fss-1234" is a valid type such that: + * 1) name: "fss". + * 2) type: "1234", (or 0x1 in binary. + * + * UTF-8 codes are available, but one must consider that 1 4-byte characte still takes up 4-bytes. + * Therefore for all 4-byte UTF-8 characters, there is a maximum of 16 characters available. + * + * This intentionally utilizes a fixed array to avoid the need of dynamic allocation. + * + * The macro_f_fll_id_t_clear() is provided for "consistency in design" reasons. + * However, it is probably a better idea to use memset to clear the name array. + * + * The name must only be "word" characters (therefore "-" is not allowed). + * + * name: The string representing the name of the FLL Identifier (This is not a NULL terminated string). + * type: The code representing the type of the Identifier. + * used: A representation of how many bytes in name are in used (if 0, then there is no name, thus this can be seen as not a valid identifier). + */ +#ifndef _di_f_fll_id_t_ + typedef struct { + char name[64]; + + uint16_t type; + uint8_t used; + } f_fll_id_t; + + #define f_fll_id_t_initialize { \ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, \ + 0, \ + 0, \ + } + + #define macro_f_fll_id_t_clear(id) \ + id.name[0] = 0; \ + id.name[1] = 0; \ + id.name[2] = 0; \ + id.name[3] = 0; \ + id.name[4] = 0; \ + id.name[5] = 0; \ + id.name[6] = 0; \ + id.name[7] = 0; \ + id.name[8] = 0; \ + id.name[9] = 0; \ + id.name[10] = 0; \ + id.name[11] = 0; \ + id.name[12] = 0; \ + id.name[13] = 0; \ + id.name[14] = 0; \ + id.name[15] = 0; \ + id.name[16] = 0; \ + id.name[17] = 0; \ + id.name[18] = 0; \ + id.name[19] = 0; \ + id.name[20] = 0; \ + id.name[21] = 0; \ + id.name[22] = 0; \ + id.name[23] = 0; \ + id.name[24] = 0; \ + id.name[25] = 0; \ + id.name[26] = 0; \ + id.name[27] = 0; \ + id.name[28] = 0; \ + id.name[29] = 0; \ + id.name[30] = 0; \ + id.name[31] = 0; \ + id.name[32] = 0; \ + id.name[33] = 0; \ + id.name[34] = 0; \ + id.name[35] = 0; \ + id.name[36] = 0; \ + id.name[37] = 0; \ + id.name[38] = 0; \ + id.name[39] = 0; \ + id.name[40] = 0; \ + id.name[41] = 0; \ + id.name[42] = 0; \ + id.name[43] = 0; \ + id.name[44] = 0; \ + id.name[45] = 0; \ + id.name[46] = 0; \ + id.name[47] = 0; \ + id.name[48] = 0; \ + id.name[49] = 0; \ + id.name[50] = 0; \ + id.name[51] = 0; \ + id.name[52] = 0; \ + id.name[53] = 0; \ + id.name[54] = 0; \ + id.name[55] = 0; \ + id.name[56] = 0; \ + id.name[57] = 0; \ + id.name[58] = 0; \ + id.name[59] = 0; \ + id.name[60] = 0; \ + id.name[61] = 0; \ + id.name[62] = 0; \ + id.name[63] = 0; \ + id.type = 0; \ + id.used = 0; +#endif // _di_f_fll_id_t__ + #ifdef __cplusplus } // extern "C" #endif diff --git a/level_1/fl_string/c/string.c b/level_1/fl_string/c/string.c index 6518717..b1a8263 100644 --- a/level_1/fl_string/c/string.c +++ b/level_1/fl_string/c/string.c @@ -546,6 +546,168 @@ extern "C" { } #endif // _di_fl_string_dynamic_seek_to_utf_character_ +#ifndef _di_fl_string_fll_identify_ + f_status_t fl_string_fll_identify(const f_string_t buffer, const f_array_length_t length, f_fll_id_t *id) { + #ifndef _di_level_1_parameter_checking_ + if (!length) return F_status_set_error(F_parameter); + #endif // _di_level_1_parameter_checking_ + + if (id) { + id->type = 0; + id->used = 0; + } + + // The FLL identifier is always at least 6 characters ("X-0000"). + if (length < 6) { + return F_false; + } + + f_status_t status = F_none; + f_array_length_t i = 0; + f_array_length_t remaining = length; + + for (; i < length; ) { + + status = f_utf_is_whitespace(buffer + i, remaining); + + if (F_status_is_error(status)) { + if (F_status_set_fine(status) == F_maybe) { + return F_status_set_error(F_complete_not_utf); + } + + return status; + } + + if (status == F_false) { + if (buffer[i] == 0) { + ++i; + --remaining; + continue; + } + + break; + } + + i += macro_f_utf_byte_width(buffer[i]); + remaining -= macro_f_utf_byte_width(buffer[i]); + } // for + + if (remaining < 6) { + return F_false; + } + + f_array_length_t j = i; + + for (; i < length; ) { + + status = f_utf_is_word(buffer + i, remaining, F_true); + if (F_status_is_error(status)) return status; + + if (status == F_false) { + if (buffer[i] == 0) { + ++i; + --remaining; + continue; + } + + break; + } + + i += macro_f_utf_byte_width(buffer[i]); + remaining -= macro_f_utf_byte_width(buffer[i]); + } // for + + if (i > length || buffer[i] != f_string_ascii_minus[0]) { + return F_false; + } + + { + f_array_length_t k = 0; + + for (; i < length && k < 5; ++i, --remaining, ++k) { + + // The hexidecimal representing the number may only be ASCII. + if (macro_f_utf_byte_width_is(buffer[i])) { + if (id) { + id->type = 0; + } + + return F_false; + } + + if (isxdigit(buffer[i])) { + if (id) { + if (k) { + id->type *= 16; + id->type += strtol(buffer + i, 0, 16); + } + else { + id->type = strtol(buffer + i, 0, 16); + } + } + } + else { + if (buffer[i] == 0) continue; + + if (id) { + id->type = 0; + } + + return F_false; + } + } // for + + if (i > length || k > 4) { + if (id) { + id->type = 0; + } + + return F_false; + } + } + + // The end of line, string, or end of length are the only valid stop points. + if (i < length && buffer[i] != f_string_eol_s[0]) { + status = f_utf_is_whitespace(buffer + i, remaining); + + if (F_status_is_error(status)) { + if (id) { + id->type = 0; + } + + return status; + } + + if (status == F_false) { + if (id) { + id->type = 0; + } + + return F_none; + } + } + + if (id) { + for (i = j, j = 0; i < length; ++i) { + + if (buffer[i] == 0) continue; + if (buffer[i] == f_string_ascii_minus[0]) break; + + id->name[j] = buffer[i]; + ++j; + } // for + + if (j < 64) { + id->name[j] = 0; + } + + id->used = j; + } + + return F_true; + } +#endif // _di_fl_string_fll_identify_ + #ifndef _di_fl_string_rip_ f_status_t fl_string_rip(const f_string_t source, const f_array_length_t length, f_string_dynamic_t *destination) { #ifndef _di_level_1_parameter_checking_ diff --git a/level_1/fl_string/c/string.h b/level_1/fl_string/c/string.h index 41e9b4b..60a5f8c 100644 --- a/level_1/fl_string/c/string.h +++ b/level_1/fl_string/c/string.h @@ -919,6 +919,43 @@ extern "C" { #endif // _di_fl_string_dynamic_seek_to_utf_character_ /** + * Identify whether or not the given string represents a single valid FLL Identifier and possibly save the FLL Identifier. + * + * This will always change the FLL Identifier used and type codes, if a FLL Identifier is provided. + * + * Whitespace may be before and after the FLL Identifier and will be ignored. + * Anything else will result in treating the character as a possible FLL Identifier. + * + * A valid FLL Identifier must terminate on either whitespace, EOL, or the stop point (length). + * + * @param buffer + * The string to process. + * @param length + * The number of bytes within the buffer to process. + * Must be greater than 0. + * @param id + * (optional) The FLL Identifier found. + * Set to NULL to not use. + * + * @return + * F_false if the buffer does not represent a valid FLL Identifier. + * F_true if the buffer does represent a valid FLL Identifier. + * + * F_complete_not_utf (with error bit) if a character is an incomplete UTF-8 fragment. + * F_maybe (with error bit) if a character could be a whitespace but width is not long enough. (This is only returned for an otherwise valid FLL Identifier.) + * F_parameter (with error bit) if a parameter is invalid. + * F_utf (with error bit) if character is an invalid UTF-8 character. + * + * @see isxdigit() + * + * @see f_utf_is_whitespace() + * @see f_utf_is_word() + */ +#ifndef _di_fl_string_fll_identify_ + extern f_status_t fl_string_fll_identify(const f_string_t buffer, const f_array_length_t length, f_fll_id_t *id); +#endif // _di_fl_string_fll_identify_ + +/** * Allocate a new string from the provided range in the string. * * Ignores leading and trailing whitespace. -- 1.8.3.1