From: Kevin Day Date: Thu, 25 Jan 2024 04:50:30 +0000 (-0600) Subject: Bugfix: FSS Extended read fails to handle non-terminated quote as per specification. X-Git-Tag: 0.6.9~37 X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=0000b3de473965cda58200f129f2990e2efbe60b;p=fll Bugfix: FSS Extended read fails to handle non-terminated quote as per specification. This is a follow up to the problems discovered while writing unit tests (from the 0.7 development branch) and referenced in this commit: 70cbbe34121dc5679961df711e70724f13104489. Given the line: a " b c d. The following Content should now be returned: [0] = " [1] = b [2] = c [3] = d. This adds a new FSS state flag 'f_fss_state_quote_not_e' to give the caller the ability to manually designate that the quotes are being disabled. Currently only the FSS Extended utilizes this flag. Refactor the private function private_fl_fss_basic_read() into private_fl_fss_basic_or_extended_read() to make it more explicitly clear that it provides functionality fo both FSS Basic and FSS Extended. This changes to the code such that when an unterminated quote is detected then the calling function will set the disable quote flag and then call the function again. --- diff --git a/level_0/f_fss/c/fss/common.h b/level_0/f_fss/c/fss/common.h index 329383d..03dcfc8 100644 --- a/level_0/f_fss/c/fss/common.h +++ b/level_0/f_fss/c/fss/common.h @@ -378,11 +378,13 @@ enum { * f_fss_state_flag_*: * - none: No flags are set. * - utf_fail_on_valid_not: Immediately fail on invalid UTF-8 character (including incomplete). + * - quote_not: Disable processing as quoted text (all found quotes are ignored and no quote-escaping is performed). */ #ifndef _di_f_fss_state_flags_ enum { f_fss_state_flag_none_e = 0, f_fss_state_flag_utf_fail_on_valid_not_e = 0x1, + f_fss_state_quote_not_e = 0x2, }; // enum #endif // _di_f_fss_state_flags_ diff --git a/level_1/fl_fss/c/fss/basic.c b/level_1/fl_fss/c/fss/basic.c index 7dd9849..7ef47cb 100644 --- a/level_1/fl_fss/c/fss/basic.c +++ b/level_1/fl_fss/c/fss/basic.c @@ -152,7 +152,7 @@ extern "C" { const f_array_length_t delimits_used = delimits->used; - const f_status_t status = private_fl_fss_basic_read(buffer, F_true, state, range, found, quote, delimits); + const f_status_t status = private_fl_fss_basic_or_extended_read(buffer, 0x1, state, range, found, quote, delimits); // The private function sets the error bit on unterminated quoted Object. if (status == F_status_set_error(F_fss_found_object_content_not)) { diff --git a/level_1/fl_fss/c/fss/extended.c b/level_1/fl_fss/c/fss/extended.c index 3b71ea1..f12da3c 100644 --- a/level_1/fl_fss/c/fss/extended.c +++ b/level_1/fl_fss/c/fss/extended.c @@ -39,9 +39,17 @@ extern "C" { f_string_range_t content_partial = f_string_range_t_initialize; - status = private_fl_fss_basic_read(buffer, F_false, state, range, &content_partial, "e, delimits); + status = private_fl_fss_basic_or_extended_read(buffer, (state.flag & f_fss_state_quote_not_e) ? 0x2 : 0x0, state, range, &content_partial, "e, delimits); - if (status == F_fss_found_object || F_status_set_fine(status) == F_fss_found_object_content_not) { + // Quote is unterminated, retry with quotes disabled as per-specification (this error should not be reported when quotes are disabled). + if (status == F_status_set_error(F_fss_found_object_content_not)) { + content_partial.start = 1; + content_partial.stop = 0; + + status = private_fl_fss_basic_or_extended_read(buffer, 0x2, state, range, &content_partial, "e, delimits); + } + + if (status == F_fss_found_object || status == F_fss_found_object_content_not) { status_allocate = f_string_ranges_increase(state.step_small, found); // The private function sets the error bit on unterminated quoted Object. @@ -193,7 +201,7 @@ extern "C" { const f_array_length_t delimits_used = delimits->used; - const f_status_t status = private_fl_fss_basic_read(buffer, F_true, state, range, found, quote, delimits); + const f_status_t status = private_fl_fss_basic_or_extended_read(buffer, 0x1, state, range, found, quote, delimits); // The private function sets the error bit on unterminated quoted Object. if (status == F_status_set_error(F_fss_found_object_content_not)) { diff --git a/level_1/fl_fss/c/fss/extended.h b/level_1/fl_fss/c/fss/extended.h index f8edcba..51a99c3 100644 --- a/level_1/fl_fss/c/fss/extended.h +++ b/level_1/fl_fss/c/fss/extended.h @@ -45,6 +45,9 @@ extern "C" { * When state.interrupt() returns, only F_interrupt and F_interrupt_not are processed. * Error bit designates an error but must be passed along with F_interrupt. * All other statuses are ignored. + * + * The following bits are supported when set on state.flags: + * - f_fss_state_quote_not_e: Explicitly disable quotes, treating quotes and their respective delimits as normal characters. * @param range * The start/stop location within the buffer to be processed. * The start location will be updated as the buffer is being processed. @@ -54,6 +57,7 @@ extern "C" { * A set of all locations where a valid content was found. * @param quotes * This will store the quote types representing the character to use (from the f_fss_quote_type_*_e). + * Each index in quotes represents a position within the found array index. * Set pointer address to 0 to not use. * @param delimits * A delimits array representing where delimits exist within the buffer. diff --git a/level_1/fl_fss/c/private-fss.c b/level_1/fl_fss/c/private-fss.c index a468e36..5ee7bb6 100644 --- a/level_1/fl_fss/c/private-fss.c +++ b/level_1/fl_fss/c/private-fss.c @@ -123,7 +123,7 @@ extern "C" { #endif // !defined(_di_fl_fss_basic_list_object_write_) || !defined(_di_fl_fss_extended_list_object_write_) #if !defined(_di_fl_fss_basic_object_read_) || !defined(_di_fl_fss_extended_object_read_) || !defined(_di_fl_fss_extended_content_read_) - f_status_t private_fl_fss_basic_read(const f_string_static_t buffer, const bool object_as, f_state_t state, f_string_range_t * const range, f_fss_object_t * const found, f_fss_quote_t * const quote, f_fss_delimits_t * const delimits) { + f_status_t private_fl_fss_basic_or_extended_read(const f_string_static_t buffer, const uint8_t flag, f_state_t state, f_string_range_t * const range, f_fss_object_t * const found, f_fss_quote_t * const quote, f_fss_delimits_t * const delimits) { f_status_t status = f_fss_skip_past_space(state, buffer, range); if (F_status_is_error(status)) return status; @@ -144,10 +144,11 @@ extern "C" { const f_array_length_t delimits_used = delimits->used; // Begin the search. + const f_array_length_t begin = range->start; found->start = range->start; // Ignore all comment lines. - if (object_as && buffer.string[range->start] == f_fss_comment_s.string[0]) { + if ((flag & 0x1) && buffer.string[range->start] == f_fss_comment_s.string[0]) { while (buffer.string[range->start] != f_fss_eol_s.string[0]) { @@ -220,7 +221,13 @@ extern "C" { // Found the end of the object while processing the slash for potential delimits. if (status == F_true) { - found->stop = range->start - 1; + if (range->start > begin) { + found->stop = range->start - 1; + } + else { + found->start = 1; + found->stop = 0; + } status = f_utf_buffer_increment(buffer, range, 1); if (F_status_is_error(status)) break; @@ -253,7 +260,7 @@ extern "C" { return F_none_stop; } - if (buffer.string[range->start] == f_fss_quote_single_s.string[0] || buffer.string[range->start] == f_fss_quote_double_s.string[0] || buffer.string[range->start] == f_fss_quote_backtick_s.string[0] || (object_as && buffer.string[range->start] == f_fss_comment_s.string[0])) { + if ((!(flag & 0x2) && (buffer.string[range->start] == f_fss_quote_single_s.string[0] || buffer.string[range->start] == f_fss_quote_double_s.string[0] || buffer.string[range->start] == f_fss_quote_backtick_s.string[0])) || ((flag & 0x1) && buffer.string[range->start] == f_fss_comment_s.string[0])) { // Only the first slash before a quote needs to be escaped (or not) as once there is a slash before a quote, this cannot ever be a quote object. // This simplifies the number of slashes needed. @@ -266,7 +273,7 @@ extern "C" { if (F_status_is_error(status)) return status; } } - else if (buffer.string[range->start] == f_fss_quote_single_s.string[0] || buffer.string[range->start] == f_fss_delimit_quote_double_s.string[0] || buffer.string[range->start] == f_fss_quote_backtick_s.string[0]) { + else if (!(flag & 0x2) && (buffer.string[range->start] == f_fss_quote_single_s.string[0] || buffer.string[range->start] == f_fss_delimit_quote_double_s.string[0] || buffer.string[range->start] == f_fss_quote_backtick_s.string[0])) { quote_found = buffer.string[range->start]; status = f_utf_buffer_increment(buffer, range, 1); @@ -543,7 +550,13 @@ extern "C" { } } - found->stop = range->start - 1; + if (range->start > begin) { + found->stop = range->start - 1; + } + else { + found->start = 1; + found->stop = 0; + } status = f_utf_buffer_increment(buffer, range, 1); if (F_status_is_error(status)) return status; @@ -627,18 +640,31 @@ extern "C" { } } else if (buffer.string[range->start] == f_fss_eol_s.string[0]) { + if (flag & 0x1) { - // The quote is incomplete, so treat the entire line as the Object as per the specification (including the quotes). - // The error bit is set to designate that the Object is found in an erroneous state (not having a terminating quote). - found->start -= 1; - found->stop = range->start - 1; + // The quote is incomplete, so treat the entire line as the Object as per the specification (including the quotes). + // The error bit is set to designate that the Object is found in an erroneous state (not having a terminating quote). + if (found->start > begin && range->start > begin) { + found->start -= 1; + found->stop = range->start - 1; + } + else { + found->start = 1; + found->stop = 0; + } + + // Move the start position to after the EOL. + ++range->start; + } + else { + + // The quote is incomplete, do not save and reset range->start so that caller can fix the position and re-run with quotes disabled. + range->start = begin; + } // The delimits cannot be preserved in this case as per specification. delimits->used = delimits_used; - // Move the start position to after the EOL. - ++range->start; - return F_status_set_error(F_fss_found_object_content_not); } @@ -684,7 +710,13 @@ extern "C" { if (F_status_is_error(status)) return status; - found->stop = range->start - 1; + if (range->start > begin) { + found->stop = range->start - 1; + } + else { + found->start = 1; + found->stop = 0; + } if (buffer.string[range->start] == f_fss_eol_s.string[0]) { @@ -712,7 +744,7 @@ extern "C" { #endif // !defined(_di_fl_fss_basic_object_read_) || !defined(_di_fl_fss_extended_object_read_) #if !defined(_di_fl_fss_basic_object_write_) || !defined(_di_fl_fss_extended_object_write_) || !defined(_di_fl_fss_extended_content_write_) - f_status_t private_fl_fss_basic_write(const bool object_as, const f_string_static_t object, const f_fss_quote_t quote, f_state_t state, f_string_range_t * const range, f_string_dynamic_t * const destination) { + f_status_t private_fl_fss_basic_write(const uint8_t flag, const f_string_static_t object, const f_fss_quote_t quote, f_state_t state, f_string_range_t * const range, f_string_dynamic_t * const destination) { f_status_t status = f_fss_skip_past_space(state, object, range); if (F_status_is_error(status)) return status; @@ -746,7 +778,7 @@ extern "C" { if (object.string[input_start] == quote_char) { quote_is = F_true; } - else if (object_as && object.string[input_start] == f_fss_comment_s.string[0]) { + else if ((flag & 0x1) && object.string[input_start] == f_fss_comment_s.string[0]) { commented = F_true; } @@ -888,7 +920,7 @@ extern "C" { destination->string[destination->used++] = object.string[range->start + i]; } // for } - else if (object_as && object.string[range->start] == f_fss_comment_s.string[0]) { + else if ((flag & 0x1) && object.string[range->start] == f_fss_comment_s.string[0]) { // Only the first slash needs to be escaped for a comment, and then only if not quote. if (item_first == input_start) { @@ -1099,9 +1131,7 @@ extern "C" { destination->string[used_start] = f_fss_delimit_slash_s.string[0]; } - if (range->start > range->stop) return F_none_stop; - - return F_none_eos; + return range->start > range->stop ? F_none_stop : F_none_eos; } #endif // !defined(_di_fl_fss_basic_object_write_) || !defined(_di_fl_fss_extended_object_write_) || !defined(_di_fl_fss_extended_content_write_) diff --git a/level_1/fl_fss/c/private-fss.h b/level_1/fl_fss/c/private-fss.h index 4ab48eb..baf69ee 100644 --- a/level_1/fl_fss/c/private-fss.h +++ b/level_1/fl_fss/c/private-fss.h @@ -83,17 +83,19 @@ extern "C" { #endif // !defined(_di_fl_fss_basic_list_object_write_) || !defined(_di_fl_fss_extended_list_object_write_) /** - * Private implementation of fl_fss_basic_object_read(). + * Provide common processing for Basic and Extended Object and Content read. * * Intended to be shared to each of the different implementation variations. * * @param buffer * The buffer to read from. - * @param object_as - * If TRUE, then this operate as an Object. - * IF FALSE, then this operates as a Content. + * @param flag + * Bits: + * - 0x0: When 0x1 bit is not set, then operate as a Content (extended). + * - 0x1: Operate as an Object (basic or extended). + * - 0x2: Operate with quoting disabled, treating all quotes and escaped quotes as literal (extended). * - * As Object, this checks if the first graph character is a comment character '#', or an escaped comment character '#'. + * As an Object, this checks if the first graph character is a comment character '#', or an escaped comment character '#'. * As Content, this does nothing special in regards to a leading '#'. * @param state * A state for providing flags and handling interrupts during long running operations. @@ -155,7 +157,7 @@ extern "C" { * @see fl_fss_extended_content_read() */ #if !defined(_di_fl_fss_basic_object_read_) || !defined(_di_fl_fss_extended_object_read_) || !defined(_di_fl_fss_extended_content_read_) - extern f_status_t private_fl_fss_basic_read(const f_string_static_t buffer, const bool object_as, f_state_t state, f_string_range_t * const range, f_fss_object_t * const found, f_fss_quote_t * const quote, f_fss_delimits_t * const delimits) F_attribute_visibility_internal_d; + extern f_status_t private_fl_fss_basic_or_extended_read(const f_string_static_t buffer, const uint8_t flag, f_state_t state, f_string_range_t * const range, f_fss_object_t * const found, f_fss_quote_t * const quote, f_fss_delimits_t * const delimits) F_attribute_visibility_internal_d; #endif // !defined(_di_fl_fss_basic_object_read_) || !defined(_di_fl_fss_extended_object_read_) || !defined(_di_fl_fss_extended_content_read_) /** @@ -165,9 +167,10 @@ extern "C" { * * Note: this does not attempt to "complete" the object. * - * @param object_as - * If TRUE, then this operate as an Object. - * IF FALSE, then this operates as a Content. + * @param flag + * Bits: + * - 0x0: When 0x1 bit is not set, then operate as a Content (extended). + * - 0x1: Operate as an Object (basic or extended). * * As Object, this checks if the first graph character is a comment character '#', or an escaped comment character '#'. * As Content, this does nothing special in regards to a leading '#'. @@ -219,7 +222,7 @@ extern "C" { * @see fl_fss_extended_content_write() */ #if !defined(_di_fl_fss_basic_object_write_) || !defined(_di_fl_fss_extended_object_write_) || !defined(_di_fl_fss_extended_content_write_) - extern f_status_t private_fl_fss_basic_write(const bool object_as, const f_string_static_t object, const f_fss_quote_t quote, f_state_t state, f_string_range_t * const range, f_string_dynamic_t * const destination) F_attribute_visibility_internal_d; + extern f_status_t private_fl_fss_basic_write(const uint8_t flag, const f_string_static_t object, const f_fss_quote_t quote, f_state_t state, f_string_range_t * const range, f_string_dynamic_t * const destination) F_attribute_visibility_internal_d; #endif // !defined(_di_fl_fss_basic_object_write_) || !defined(_di_fl_fss_extended_object_write_) || !defined(_di_fl_fss_extended_content_write_) /**