From 91560a67126ad8df0ee8bbd1c76769fa01f673a4 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Wed, 24 Jan 2024 22:35:59 -0600 Subject: [PATCH] Bugfix: FSS Extended read fails to handle non-terminated quote as per specification. This is a follow up to the problems discovered while writing unit tests and referenced in this commit: 70cbbe34121dc5679961df711e70724f13104489. Given the line: a " b c d. The following Content should now be returned: [0] = " [1] = b [2] = c [3] = d. This adds a new FSS state flag 'f_fss_state_quote_not_e' to give the caller the ability to manually designate that the quotes are being disabled. Currently only the FSS Extended utilizes this flag. Refactor the private function private_fl_fss_basic_read() into private_fl_fss_basic_or_extended_read() to make it more explicitly clear that it provides functionality fo both FSS Basic and FSS Extended. This changes to the code such that when an unterminated quote is detected then the calling function will set the disable quote flag and then call the function again. The relevant FSS Extended unit tests are updated. --- level_0/f_fss/c/fss/common.h | 6 ++- level_1/fl_fss/c/fss/basic.c | 2 +- level_1/fl_fss/c/fss/extended.c | 17 +++++-- level_1/fl_fss/c/fss/extended.h | 4 ++ level_1/fl_fss/c/private-fss.c | 47 ++++++++++--------- level_1/fl_fss/c/private-fss.h | 25 +++++----- .../data/tests/contents/extended-all_read-3.txt | 2 +- .../data/tests/contents/extended-all_read-4.txt | 2 +- .../data/tests/contents/extended-all_read-5.txt | 2 +- .../data/tests/contents/extended-all_read-6.txt | 53 ++++++++++++++++++++++ .../data/tests/contents/extended-all_read-7.txt | 53 ++++++++++++++++++++++ .../tests/unit/c/test-fss-extended_content_read.c | 53 ++++++++++------------ 12 files changed, 195 insertions(+), 71 deletions(-) create mode 100644 level_1/fl_fss/data/tests/contents/extended-all_read-6.txt create mode 100644 level_1/fl_fss/data/tests/contents/extended-all_read-7.txt diff --git a/level_0/f_fss/c/fss/common.h b/level_0/f_fss/c/fss/common.h index 0f046ff..ce3ba3e 100644 --- a/level_0/f_fss/c/fss/common.h +++ b/level_0/f_fss/c/fss/common.h @@ -261,7 +261,7 @@ enum { * * Recommended to be set to at least 4 to be UTF-8 friendlier. * - * F_fss_default_*: + * F_fss_default_*_d: * - block_size_huge: The "huge" size in blocks to process for an FSS related task. * - block_size_normal: The "normal" size in blocks to process for an FSS related task. * - block_size_small: The "small" size in blocks to process for an FSS related task. @@ -285,14 +285,16 @@ enum { * * The f_fss_state_flag_none_e is expected to be 0, therefore it must be safe to use 0 directly. * - * f_fss_state_flag_*: + * f_fss_state_flag_*_e: * - none: No flags are set. * - utf_fail_on_valid_not: Immediately fail on invalid UTF-8 character (including incomplete). + * - quote_not: Disable processing as quoted text (all found quotes are ignored and no quote-escaping is performed). */ #ifndef _di_f_fss_state_flag_e_ enum { f_fss_state_flag_none_e = 0x0, f_fss_state_flag_utf_fail_on_valid_not_e = 0x1, + f_fss_state_quote_not_e = 0x2, }; // enum #endif // _di_f_fss_state_flag_e_ diff --git a/level_1/fl_fss/c/fss/basic.c b/level_1/fl_fss/c/fss/basic.c index d198944..c6a4700 100644 --- a/level_1/fl_fss/c/fss/basic.c +++ b/level_1/fl_fss/c/fss/basic.c @@ -170,7 +170,7 @@ extern "C" { const f_number_unsigned_t delimits_used = delimits->used; - private_fl_fss_basic_read(buffer, F_true, range, found, quote, delimits, state); + private_fl_fss_basic_or_extended_read(buffer, 0x1, range, found, quote, delimits, state); if (state->status == F_status_set_error(F_fss_found_object_content_not)) { diff --git a/level_1/fl_fss/c/fss/extended.c b/level_1/fl_fss/c/fss/extended.c index 224c919..69d20dc 100644 --- a/level_1/fl_fss/c/fss/extended.c +++ b/level_1/fl_fss/c/fss/extended.c @@ -44,7 +44,7 @@ extern "C" { const f_number_unsigned_t quotes_used = quotes ? quotes->used : 0; uint8_t content_found = 0; - uint8_t quote = 0; + uint8_t quote = f_fss_quote_type_none_e; f_status_t status = F_okay; @@ -52,9 +52,18 @@ extern "C" { f_range_t content_partial = f_range_t_initialize; - private_fl_fss_basic_read(buffer, F_false, range, &content_partial, "e, delimits, state); + private_fl_fss_basic_or_extended_read(buffer, (state->flag & f_fss_state_quote_not_e) ? 0x2 : 0x0, range, &content_partial, "e, delimits, state); - if (state->status == F_fss_found_object || F_status_set_fine(state->status) == F_fss_found_object_content_not) { + // Quote is unterminated, retry with quotes disabled as per-specification (this error should not be reported when quotes are disabled). + if (state->status == F_status_set_error(F_fss_found_object_content_not)) { + content_partial.start = 1; + content_partial.stop = 0; + state->status = F_okay; + + private_fl_fss_basic_or_extended_read(buffer, 0x2, range, &content_partial, "e, delimits, state); + } + + if (state->status == F_fss_found_object || state->status == F_fss_found_object_content_not) { status = f_memory_array_increase(state->step_small, sizeof(f_range_t), (void **) &found->array, &found->used, &found->size); // The private function sets the error bit on unterminated quoted Object. @@ -221,7 +230,7 @@ extern "C" { const f_number_unsigned_t delimits_used = delimits->used; - private_fl_fss_basic_read(buffer, F_true, range, found, quote, delimits, state); + private_fl_fss_basic_or_extended_read(buffer, 0x1, range, found, quote, delimits, state); if (state->status == F_status_set_error(F_fss_found_object_content_not)) { diff --git a/level_1/fl_fss/c/fss/extended.h b/level_1/fl_fss/c/fss/extended.h index eabb0da..c15d3e4 100644 --- a/level_1/fl_fss/c/fss/extended.h +++ b/level_1/fl_fss/c/fss/extended.h @@ -45,6 +45,7 @@ extern "C" { * A set of all locations where a valid content was found. * @param quotes * (optional) This will store the quote type representing the character to use (from the f_fss_quote_type_*_e). + * Each index in quotes represents a position within the found array index. * Set to NULL to not use. * @param delimits * A delimits array representing where delimits exist within the buffer. @@ -58,6 +59,9 @@ extern "C" { * Error bit designates an error but must be passed along with F_interrupt. * All other statuses are ignored. * + * The following bits are supported when set on state.flags: + * - f_fss_state_quote_not_e: Explicitly disable quotes, treating quotes and their respective delimits as normal characters. + * * Must not be NULL. * * This alters state.status: diff --git a/level_1/fl_fss/c/private-fss.c b/level_1/fl_fss/c/private-fss.c index 26365c7..6809e6a 100644 --- a/level_1/fl_fss/c/private-fss.c +++ b/level_1/fl_fss/c/private-fss.c @@ -111,7 +111,7 @@ extern "C" { #endif // !defined(_di_fl_fss_basic_list_object_write_) || !defined(_di_fl_fss_extended_list_object_write_) #if !defined(_di_fl_fss_basic_object_read_) || !defined(_di_fl_fss_extended_object_read_) || !defined(_di_fl_fss_extended_content_read_) - void private_fl_fss_basic_read(const f_string_static_t buffer, const bool object_as, f_range_t * const range, f_range_t * const found, uint8_t * const quote, f_number_unsigneds_t * const delimits, f_state_t * const state) { + void private_fl_fss_basic_or_extended_read(const f_string_static_t buffer, const uint8_t flag, f_range_t * const range, f_range_t * const found, uint8_t * const quote, f_number_unsigneds_t * const delimits, f_state_t * const state) { f_fss_skip_past_space(buffer, range, state); if (F_status_is_error(state->status)) return; @@ -146,7 +146,7 @@ extern "C" { found->start = range->start; // Ignore all comment lines. - if (object_as && buffer.string[range->start] == f_fss_comment_s.string[0]) { + if ((flag & 0x1) && buffer.string[range->start] == f_fss_comment_s.string[0]) { while (buffer.string[range->start] != f_fss_eol_s.string[0]) { @@ -262,7 +262,7 @@ extern "C" { return; } - if (buffer.string[range->start] == f_fss_quote_single_s.string[0] || buffer.string[range->start] == f_fss_quote_double_s.string[0] || buffer.string[range->start] == f_fss_quote_grave_s.string[0] || (object_as && buffer.string[range->start] == f_fss_comment_s.string[0])) { + if (!(flag & 0x2) && (buffer.string[range->start] == f_fss_quote_single_s.string[0] || buffer.string[range->start] == f_fss_quote_double_s.string[0] || buffer.string[range->start] == f_fss_quote_grave_s.string[0]) || ((flag & 0x1) && buffer.string[range->start] == f_fss_comment_s.string[0])) { // Only the first slash before a quote needs to be escaped (or not) as once there is a slash before a quote, this cannot ever be a quote object. // This simplifies the number of slashes needed. @@ -275,7 +275,7 @@ extern "C" { if (F_status_is_error(state->status)) return; } } - else if (buffer.string[range->start] == f_fss_quote_single_s.string[0] || buffer.string[range->start] == f_fss_quote_double_s.string[0] || buffer.string[range->start] == f_fss_quote_grave_s.string[0]) { + else if (!(flag & 0x2) && (buffer.string[range->start] == f_fss_quote_single_s.string[0] || buffer.string[range->start] == f_fss_quote_double_s.string[0] || buffer.string[range->start] == f_fss_quote_grave_s.string[0])) { quote_found = buffer.string[range->start]; state->status = f_utf_buffer_increment(buffer, range, 1); @@ -652,25 +652,31 @@ extern "C" { } } else if (buffer.string[range->start] == f_fss_eol_s.string[0]) { + if (flag & 0x1) { - // The quote is incomplete, so treat the entire line as the Object as per the specification (including the quotes). - // The error bit is set to designate that the Object is found in an erroneous state (not having a terminating quote). - if (found->start > begin && range->start > begin) { - found->start -= 1; - found->stop = range->start - 1; + // The quote is incomplete, so treat the entire line as the Object as per the specification (including the quotes). + // The error bit is set to designate that the Object is found in an erroneous state (not having a terminating quote). + if (found->start > begin && range->start > begin) { + found->start -= 1; + found->stop = range->start - 1; + } + else { + found->start = 1; + found->stop = 0; + } + + // Move the start position to after the EOL. + ++range->start; } else { - found->start = 1; - found->stop = 0; - } - state->status = F_status_set_error(F_fss_found_object_content_not); + // The quote is incomplete, do not save and reset range->start so that caller can fix the position and re-run with quotes disabled. + range->start = begin; + } // The delimits cannot be preserved in this case as per specification. delimits->used = delimits_used; - - // Move the start position to after the EOL. - ++range->start; + state->status = F_status_set_error(F_fss_found_object_content_not); return; } @@ -754,7 +760,7 @@ extern "C" { #endif // !defined(_di_fl_fss_basic_object_read_) || !defined(_di_fl_fss_extended_object_read_) #if !defined(_di_fl_fss_basic_object_write_) || !defined(_di_fl_fss_extended_object_write_) || !defined(_di_fl_fss_extended_content_write_) - void private_fl_fss_basic_write(const bool object_as, const f_string_static_t object, const uint8_t quote, f_range_t * const range, f_string_dynamic_t * const destination, f_state_t * const state, void * const internal) { + void private_fl_fss_basic_write(const uint8_t flag, const f_string_static_t object, const uint8_t quote, f_range_t * const range, f_string_dynamic_t * const destination, f_state_t * const state, void * const internal) { f_fss_skip_past_space(object, range, state); if (F_status_is_error(state->status)) return; @@ -804,7 +810,7 @@ extern "C" { if (object.string[input_start] == quote_char) { quoted_is = F_true; } - else if (object_as && object.string[input_start] == f_fss_comment_s.string[0]) { + else if ((flag & 0x1) && object.string[input_start] == f_fss_comment_s.string[0]) { commented = F_true; } @@ -934,7 +940,7 @@ extern "C" { destination->string[destination->used++] = object.string[range->start + i]; } // for } - else if (object_as && object.string[range->start] == f_fss_comment_s.string[0]) { + else if ((flag & 0x1) && object.string[range->start] == f_fss_comment_s.string[0]) { // Only the first slash needs to be escaped for a comment, and then only if not quote. if (item_first == input_start) { @@ -1145,8 +1151,7 @@ extern "C" { destination->string[used_start] = f_fss_slash_s.string[0]; } - if (range->start > range->stop) state->status = F_okay_stop; - else state->status = F_okay_eos; + state->status = range->start > range->stop ? F_okay_stop : F_okay_eos; } #endif // !defined(_di_fl_fss_basic_object_write_) || !defined(_di_fl_fss_extended_object_write_) || !defined(_di_fl_fss_extended_content_write_) diff --git a/level_1/fl_fss/c/private-fss.h b/level_1/fl_fss/c/private-fss.h index cc1d8a6..aab37d4 100644 --- a/level_1/fl_fss/c/private-fss.h +++ b/level_1/fl_fss/c/private-fss.h @@ -83,17 +83,17 @@ extern "C" { #endif // !defined(_di_fl_fss_basic_list_object_write_) || !defined(_di_fl_fss_extended_list_object_write_) /** - * Private implementation of fl_fss_basic_object_read(). - * - * Intended to be shared to each of the different implementation variations. + * Provide common processing for Basic and Extended Object and Content read. * * @param buffer * The buffer to read from. - * @param object_as - * If TRUE, then this operate as an Object. - * IF FALSE, then this operates as a Content. + * @param flag + * Bits: + * - 0x0: When 0x1 bit is not set, then operate as a Content (extended). + * - 0x1: Operate as an Object (basic or extended). + * - 0x2: Operate with quoting disabled, treating all quotes and escaped quotes as literal (extended). * - * As Object, this checks if the first graph character is a comment character '#', or an escaped comment character '#'. + * As an Object, this checks if the first graph character is a comment character '#', or an escaped comment character '#'. * As Content, this does nothing special in regards to a leading '#'. * @param range * The start/stop location within the buffer to be processed. @@ -156,7 +156,7 @@ extern "C" { * @see fl_fss_extended_content_read() */ #if !defined(_di_fl_fss_basic_object_read_) || !defined(_di_fl_fss_extended_object_read_) || !defined(_di_fl_fss_extended_content_read_) - extern void private_fl_fss_basic_read(const f_string_static_t buffer, const bool object_as, f_range_t * const range, f_range_t * const found, uint8_t * const quote, f_number_unsigneds_t * const delimits, f_state_t * const state) F_attribute_visibility_internal_d; + extern void private_fl_fss_basic_or_extended_read(const f_string_static_t buffer, const uint8_t flag, f_range_t * const range, f_range_t * const found, uint8_t * const quote, f_number_unsigneds_t * const delimits, f_state_t * const state) F_attribute_visibility_internal_d; #endif // !defined(_di_fl_fss_basic_object_read_) || !defined(_di_fl_fss_extended_object_read_) || !defined(_di_fl_fss_extended_content_read_) /** @@ -166,9 +166,10 @@ extern "C" { * * Note: this does not attempt to "complete" the object. * - * @param object_as - * If TRUE, then this operate as an Object. - * IF FALSE, then this operates as a Content. + * @param flag + * Bits: + * - 0x0: When 0x1 bit is not set, then operate as a Content (extended). + * - 0x1: Operate as an Object (basic or extended). * * As Object, this checks if the first graph character is a comment character '#', or an escaped comment character '#'. * As Content, this does nothing special in regards to a leading '#'. @@ -224,7 +225,7 @@ extern "C" { * @see fl_fss_extended_content_write() */ #if !defined(fl_fss_basic_object_write) || !defined(fl_fss_extended_object_write) || !defined(_di_fl_fss_extended_content_write_) - extern void private_fl_fss_basic_write(const bool object_as, const f_string_static_t object, const uint8_t quote, f_range_t * const range, f_string_dynamic_t * const destination, f_state_t * const state, void * const internal) F_attribute_visibility_internal_d; + extern void private_fl_fss_basic_write(const uint8_t flag, const f_string_static_t object, const uint8_t quote, f_range_t * const range, f_string_dynamic_t * const destination, f_state_t * const state, void * const internal) F_attribute_visibility_internal_d; #endif // !defined(fl_fss_basic_object_write) || !defined(fl_fss_extended_object_write) || !defined(_di_fl_fss_extended_content_write_) /** diff --git a/level_1/fl_fss/data/tests/contents/extended-all_read-3.txt b/level_1/fl_fss/data/tests/contents/extended-all_read-3.txt index 3f63162..f652283 100644 --- a/level_1/fl_fss/data/tests/contents/extended-all_read-3.txt +++ b/level_1/fl_fss/data/tests/contents/extended-all_read-3.txt @@ -6,7 +6,7 @@ 7 8 - +d diff --git a/level_1/fl_fss/data/tests/contents/extended-all_read-4.txt b/level_1/fl_fss/data/tests/contents/extended-all_read-4.txt index 23633ef..1441742 100644 --- a/level_1/fl_fss/data/tests/contents/extended-all_read-4.txt +++ b/level_1/fl_fss/data/tests/contents/extended-all_read-4.txt @@ -6,7 +6,7 @@ - +9 diff --git a/level_1/fl_fss/data/tests/contents/extended-all_read-5.txt b/level_1/fl_fss/data/tests/contents/extended-all_read-5.txt index 88e9613..e52f41a 100644 --- a/level_1/fl_fss/data/tests/contents/extended-all_read-5.txt +++ b/level_1/fl_fss/data/tests/contents/extended-all_read-5.txt @@ -31,7 +31,7 @@ -' +' diff --git a/level_1/fl_fss/data/tests/contents/extended-all_read-6.txt b/level_1/fl_fss/data/tests/contents/extended-all_read-6.txt new file mode 100644 index 0000000..a06ec43 --- /dev/null +++ b/level_1/fl_fss/data/tests/contents/extended-all_read-6.txt @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +space! + +41 +42 + + + + + + + diff --git a/level_1/fl_fss/data/tests/contents/extended-all_read-7.txt b/level_1/fl_fss/data/tests/contents/extended-all_read-7.txt new file mode 100644 index 0000000..b416b73 --- /dev/null +++ b/level_1/fl_fss/data/tests/contents/extended-all_read-7.txt @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +39 + + + + + + + + + + diff --git a/level_1/fl_fss/tests/unit/c/test-fss-extended_content_read.c b/level_1/fl_fss/tests/unit/c/test-fss-extended_content_read.c index 2e6cdaa..c6637c1 100644 --- a/level_1/fl_fss/tests/unit/c/test-fss-extended_content_read.c +++ b/level_1/fl_fss/tests/unit/c/test-fss-extended_content_read.c @@ -148,6 +148,8 @@ void test__fl_fss_extended_content_read__works(void **void_state) { data__file_open__named_at__all_read("contents", "extended", 3), data__file_open__named_at__all_read("contents", "extended", 4), data__file_open__named_at__all_read("contents", "extended", 5), + data__file_open__named_at__all_read("contents", "extended", 6), + data__file_open__named_at__all_read("contents", "extended", 7), }; assert_non_null(file_strings); @@ -157,6 +159,8 @@ void test__fl_fss_extended_content_read__works(void **void_state) { assert_non_null(file_contents[3]); assert_non_null(file_contents[4]); assert_non_null(file_contents[5]); + assert_non_null(file_contents[6]); + assert_non_null(file_contents[7]); size_t max = 0; char *line_string = 0; @@ -167,9 +171,11 @@ void test__fl_fss_extended_content_read__works(void **void_state) { 0, 0, 0, + 0, + 0, }; ssize_t result = 0; - const uint8_t total_content = 6; + const uint8_t total_content = 8; f_string_static_t buffer_string = f_string_static_t_initialize; @@ -195,10 +201,10 @@ void test__fl_fss_extended_content_read__works(void **void_state) { buffer_string.used = (f_number_unsigned_t) result; buffer_string.size = buffer_string.used; - max = 255; - for (uint8_t i = 0; i < total_content; ++i) { + max = 255; + result = getline(&line_content[i], &max, file_contents[i]); assert_return_code(result, 0); @@ -230,7 +236,7 @@ void test__fl_fss_extended_content_read__works(void **void_state) { if (state.status == F_fss_found_content) { assert_true(found.used); - assert_true(found.used < total_content); + assert_true(found.used <= (f_number_unsigned_t) total_content); { const f_status_t status = f_string_dynamic_append(buffer_string, &delimit_string); @@ -273,25 +279,20 @@ void test__fl_fss_extended_content_read__works(void **void_state) { } if (line_string) free(line_string); - if (line_content[0]) free(line_content[0]); - if (line_content[1]) free(line_content[1]); - if (line_content[2]) free(line_content[2]); - if (line_content[3]) free(line_content[3]); - if (line_content[4]) free(line_content[4]); - if (line_content[5]) free(line_content[5]); if (result_string.string) free(result_string.string); if (delimit_string.string) free(delimit_string.string); if (delimits.array) free(delimits.array); if (found.array) free(found.array); if (quotes.array) free(quotes.array); + for (uint8_t i = 0; i < total_content; ++i) { + + if (line_content[i]) free(line_content[i]); + + line_content[i] = 0; + } // for + line_string = 0; - line_content[0] = 0; - line_content[1] = 0; - line_content[2] = 0; - line_content[3] = 0; - line_content[4] = 0; - line_content[5] = 0; result_string.string = 0; result_string.used = 0; result_string.size = 0; @@ -309,24 +310,20 @@ void test__fl_fss_extended_content_read__works(void **void_state) { quotes.size = 0; } // for + for (uint8_t i = 0; i < total_content; ++i) { + + if (file_contents[i]) fclose(file_contents[i]); + if (line_content[i]) free(line_content[i]); + + line_content[i] = 0; + } // for + if (file_strings) fclose(file_strings); - if (file_contents[0]) fclose(file_contents[0]); - if (file_contents[1]) fclose(file_contents[1]); - if (file_contents[2]) fclose(file_contents[2]); - if (file_contents[3]) fclose(file_contents[3]); - if (file_contents[4]) fclose(file_contents[4]); - if (file_contents[5]) fclose(file_contents[5]); if (delimits.array) free(delimits.array); if (found.array) free(found.array); if (quotes.array) free(quotes.array); if (line_string) free(line_string); - if (line_content[0]) free(line_content[0]); - if (line_content[1]) free(line_content[1]); - if (line_content[2]) free(line_content[2]); - if (line_content[3]) free(line_content[3]); - if (line_content[4]) free(line_content[4]); - if (line_content[5]) free(line_content[5]); if (result_string.string) free(result_string.string); if (delimit_string.string) free(delimit_string.string); } -- 1.8.3.1