From 4f0c01eb8e34762daf89996a2910c4017a5c6b3e Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Fri, 25 Feb 2022 23:04:47 -0600 Subject: [PATCH] Bugfix: Improper IKI vocabulary position for when a quote is immediately to the left of the vocabulary name. I distantly remember attempting to shorthand and optimize the logic to reduce the while loop usage. This appears to have backfired in that I missed one of the possible conditions. An example of this is the following bash command utilizing IKI substitution: echo "IKI Path is 'define:"PATH"'" In the aboce string, the bug is claining the IKI variable is "'define". The quote is not a word charcter, not a dash, and not a plus. The IKI vocabulary is supposed to be "define". Redesign this back to have the multiple loops. While this is messier, it should be harder to make a logic mistake. Cleanup some of the logic to not need the private_f_iki_seek_special() function. Make sure the width is always calculated prior to calling the UTF aware string functions. Update wording in iki.txt specification to make a little more sense. --- level_0/f_iki/c/iki.c | 140 ++++++++++++++++++------------------------ level_0/f_iki/c/private-iki.c | 42 ------------- level_0/f_iki/c/private-iki.h | 25 -------- specifications/iki.txt | 2 +- 4 files changed, 62 insertions(+), 147 deletions(-) diff --git a/level_0/f_iki/c/iki.c b/level_0/f_iki/c/iki.c index ce3d5d1..a09643a 100644 --- a/level_0/f_iki/c/iki.c +++ b/level_0/f_iki/c/iki.c @@ -78,20 +78,6 @@ extern "C" { width_max = buffer->used - range->start; } - status = private_f_iki_seek_special(*buffer, range); - - if (F_status_is_error(status)) { - return status; - } - - if (range->start > range->stop) { - return F_data_not_stop; - } - - if (range->start >= buffer->used) { - return F_data_not_eos; - } - f_string_range_t found_vocabulary = f_string_range_t_initialize; f_array_length_t found_content = 0; f_array_length_t vocabulary_slash_first = range->start; @@ -100,14 +86,38 @@ extern "C" { uint8_t quote = 0; bool vocabulary_delimited = F_false; - bool find_next = F_false; - - found_vocabulary.start = range->start; do { - // Find the start and end of the vocabulary name. - while (range->start <= range->stop && range->start < buffer->used) { + // Find the start of the vocabulary name. + while (F_status_is_error_not(status) && range->start <= range->stop && range->start < buffer->used) { + + if (state.interrupt) { + status = state.interrupt((void *) &state, 0); + + if (F_status_set_fine(status) == F_interrupt) { + status = F_status_set_error(F_interrupt); + + break; + } + } + + width_max = buffer->used - range->start; + + status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false); + if (F_status_is_error(status)) break; + + if (status == F_true) { + found_vocabulary.start = range->start++; + + break; + } + + status = f_utf_buffer_increment(*buffer, range, 1); + } // while + + // Find the end of the vocabulary name. + while (F_status_is_error_not(status) && range->start <= range->stop && range->start < buffer->used) { if (state.interrupt) { status = state.interrupt((void *) &state, 0); @@ -147,11 +157,8 @@ extern "C" { break; } - - break; } - - if (buffer->string[range->start] == f_iki_syntax_slash_s.string[0]) { + else if (buffer->string[range->start] == f_iki_syntax_slash_s.string[0]) { bool separator_found = F_false; vocabulary_slash_first = range->start; @@ -159,6 +166,16 @@ extern "C" { // The slash only needs to be delimited if it were to otherwise be a valid vocabulary name. while (range->start <= range->stop && range->start < buffer->used) { + if (state.interrupt) { + status = state.interrupt((void *) &state, 0); + + if (F_status_set_fine(status) == F_interrupt) { + status = F_status_set_error(F_interrupt); + + break; + } + } + if (buffer->string[range->start] == f_iki_syntax_placeholder_s.string[0]) { ++range->start; @@ -169,21 +186,14 @@ extern "C" { if (buffer->string[range->start] == f_iki_syntax_quote_single_s.string[0] || buffer->string[range->start] == f_iki_syntax_quote_double_s.string[0]) { vocabulary_delimited = F_true; quote = buffer->string[range->start++]; - - break; } - else { - find_next = F_true; - break; - } + break; } else if (buffer->string[range->start] == f_iki_syntax_separator_s.string[0]) { separator_found = F_true; } else if (buffer->string[range->start] != f_iki_syntax_slash_s.string[0]) { - find_next = F_true; - break; } @@ -191,47 +201,23 @@ extern "C" { if (F_status_is_error(status)) break; } // while - break; + if (F_status_is_error(status) || range->start > range->stop || range->start >= buffer->used) break; } - - width_max = (range->stop - range->start) + 1; - - if (width_max > buffer->used - range->start) { + else { width_max = buffer->used - range->start; - } - status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false); - if (F_status_is_error(status)) break; - - // Current word-dash-plus block is not a valid variable name, try again. - if (status == F_false) { - status = private_f_iki_seek_special(*buffer, range); + status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false); if (F_status_is_error(status)) break; - if (range->start > range->stop) { - data->delimits.used = delimits_used; - - return F_data_not_stop; - } - - if (range->start >= buffer->used) { - data->delimits.used = delimits_used; - - return F_data_not_eos; - } - - found_vocabulary.start = range->start; + // Not a valid IKI vocabulary name. + if (status != F_true) break; } status = f_utf_buffer_increment(*buffer, range, 1); if (F_status_is_error(status)) break; } // while - if (F_status_is_error(status)) { - data->delimits.used = delimits_used; - - return status; - } + if (F_status_is_error(status) || range->start > range->stop || range->start >= buffer->used) break; // Process potentially valid content. if (quote) { @@ -264,7 +250,6 @@ extern "C" { data->delimits.array[data->delimits.used++] = vocabulary_slash_first; - find_next = F_true; vocabulary_delimited = F_false; quote = 0; @@ -301,7 +286,8 @@ extern "C" { return F_none; } - else if (buffer->string[range->start] == f_iki_syntax_slash_s.string[0]) { + + if (buffer->string[range->start] == f_iki_syntax_slash_s.string[0]) { f_array_length_t content_slash_first = range->start; f_array_length_t content_slash_total = 0; @@ -355,11 +341,6 @@ extern "C" { quote = 0; ++range->start; - - // Skip past all initial non-word, non-dash, and non-plus. - status = private_f_iki_seek_special(*buffer, range); - - found_vocabulary.start = range->start; } else { status = f_string_ranges_increase(state.step_small, &data->variable); @@ -409,26 +390,27 @@ extern "C" { status = f_utf_buffer_increment(*buffer, range, 1); if (F_status_is_error(status)) break; } // while + + if (F_status_is_error(status)) break; + + quote = 0; } else { vocabulary_delimited = F_false; - find_next = F_true; } - if (F_status_is_error(status)) { - data->delimits.used = delimits_used; + if (F_status_is_error(status) || range->start > range->stop || range->start >= buffer->used) break; - return status; - } + status = f_utf_buffer_increment(*buffer, range, 1); + if (F_status_is_error(status)) break; - if (find_next) { - status = private_f_iki_seek_special(*buffer, range); + } while (range->start <= range->stop && range->start < buffer->used); - found_vocabulary.start = range->start; - find_next = F_false; - } + if (F_status_is_error(status)) { + data->delimits.used = delimits_used; - } while (range->start <= range->stop && range->start < buffer->used); + return status; + } if (range->start > range->stop) { return F_data_not_stop; diff --git a/level_0/f_iki/c/private-iki.c b/level_0/f_iki/c/private-iki.c index 0a4727e..cc4b5dc 100644 --- a/level_0/f_iki/c/private-iki.c +++ b/level_0/f_iki/c/private-iki.c @@ -81,48 +81,6 @@ extern "C" { } #endif // !defined(_di_f_iki_datas_append_) || !defined(_di_f_iki_datas_decrease_by_) || !defined(_di_f_iki_datas_increase_) || !defined(_di_f_iki_datas_increase_by_) || !defined(_di_f_iki_datas_resize_) -#if !defined(_di_f_iki_read_) - f_status_t private_f_iki_seek_special(const f_string_static_t buffer, f_string_range_t * const range) { - - f_status_t status = F_none; - f_array_length_t width_max = 0; - - while (range->start <= range->stop && range->start < buffer.used) { - - if (buffer.string[range->start] == f_iki_syntax_separator_s.string[0]) { - break; - } - - if (buffer.string[range->start] == f_iki_syntax_quote_double_s.string[0]) { - break; - } - - if (buffer.string[range->start] == f_iki_syntax_quote_single_s.string[0]) { - break; - } - - if (buffer.string[range->start] == f_iki_syntax_slash_s.string[0]) { - break; - } - - width_max = (range->stop - range->start) + 1; - - if (width_max > buffer.used - range->start) { - width_max = buffer.used - range->start; - } - - status = f_utf_is_word_dash_plus(buffer.string + range->start, width_max, F_false); - if (F_status_is_error(status)) return status; - if (status == F_true) return status; - - status = f_utf_buffer_increment(buffer, range, 1); - if (F_status_is_error(status)) return status; - } // while - - return F_false; - } -#endif // !defined(_di_f_iki_read_) - #if !defined(_di_f_iki_content_is_) || !defined(_di_f_iki_content_partial_is_) f_status_t private_f_iki_content_partial_is(const f_string_static_t buffer, const f_string_range_t range, const char quote) { diff --git a/level_0/f_iki/c/private-iki.h b/level_0/f_iki/c/private-iki.h index 24c2664..658e963 100644 --- a/level_0/f_iki/c/private-iki.h +++ b/level_0/f_iki/c/private-iki.h @@ -116,31 +116,6 @@ extern "C" { extern f_status_t private_f_iki_object_partial_is(const f_string_static_t buffer, const f_string_range_t range) F_attribute_visibility_internal_d; #endif // !defined(_di_f_iki_object_is_) || !defined(_di_f_iki_object_partial_is_) -/** - * Seek until a word, a dash, or a plus is found or is not found. - * - * This will ignore the delimit placeholder. - * This will stop at any special characters, such as a colon, a single quote, a double quote, or a backslash. - * - * @param buffer - * The string to process. - * @param range - * The range within the buffer that represents the current position being processed. - * - * @return - * F_true if stopped on a word, a dash, or a plus. - * F_false if stopped on a plus scolon, single quote, double quote, and backslash or stopped on something other than a word, a dash, or a plus. - * - * Errors (with error bit) from: f_utf_buffer_increment(). - * Errors (with error bit) from: f_utf_is_word_dash_plus(). - * - * @see f_utf_buffer_increment() - * @see f_utf_is_word_dash_plus() - */ -#if !defined(_di_f_iki_read_) - extern f_status_t private_f_iki_seek_special(const f_string_static_t buffer, f_string_range_t * const range) F_attribute_visibility_internal_d; -#endif // !defined(_di_f_iki_read_) - #ifdef __cplusplus } // extern "C" #endif diff --git a/specifications/iki.txt b/specifications/iki.txt index 2b20438..40be258 100644 --- a/specifications/iki.txt +++ b/specifications/iki.txt @@ -11,7 +11,7 @@ IKI Specifications: The IKI format will use iki-0000 to represent an IKI with no explicitly defined vocabulary. Whereas iki-0001 and beyond represent a specific IKI vocabulary. - Whitespace, non-word (and non "_", "-", "+") character punctuations, or the start of file must exist before any valid variable name. + A potential IKI variable name starts on word (or "_", "-", "+") characters. Whitespace and non-word (and non "_", "-", "+") character punctuations may not exist as part of the variable name. The only Unicode dash-like characters allowed as a "dash" are those intended to connect, such as the Unicode hyphens (U+2010 and U+2011). -- 1.8.3.1