From 4f0c01eb8e34762daf89996a2910c4017a5c6b3e Mon Sep 17 00:00:00 2001
From: Kevin Day <thekevinday@gmail.com>
Date: Fri, 25 Feb 2022 23:04:47 -0600
Subject: [PATCH] Bugfix: Improper IKI vocabulary position for when a quote is
 immediately to the left of the vocabulary name.

I distantly remember attempting to shorthand and optimize the logic to reduce the while loop usage.
This appears to have backfired in that I missed one of the possible conditions.

An example of this is the following bash command utilizing IKI substitution:
  echo "IKI Path is 'define:"PATH"'"

In the aboce string, the bug is claining the IKI variable is "'define".
The quote is not a word charcter, not a dash, and not a plus.
The IKI vocabulary is supposed to be "define".

Redesign this back to have the multiple loops.
While this is messier, it should be harder to make a logic mistake.

Cleanup some of the logic to not need the private_f_iki_seek_special() function.

Make sure the width is always calculated prior to calling the UTF aware string functions.

Update wording in iki.txt specification to make a little more sense.
---
 level_0/f_iki/c/iki.c         | 140 +++++++++++++++-------------------
 level_0/f_iki/c/private-iki.c |  42 ----------
 level_0/f_iki/c/private-iki.h |  25 ------
 specifications/iki.txt        |   2 +-
 4 files changed, 62 insertions(+), 147 deletions(-)

diff --git a/level_0/f_iki/c/iki.c b/level_0/f_iki/c/iki.c
index ce3d5d105..a09643ada 100644
--- a/level_0/f_iki/c/iki.c
+++ b/level_0/f_iki/c/iki.c
@@ -78,20 +78,6 @@ extern "C" {
       width_max = buffer->used - range->start;
     }
 
-    status = private_f_iki_seek_special(*buffer, range);
-
-    if (F_status_is_error(status)) {
-      return status;
-    }
-
-    if (range->start > range->stop) {
-      return F_data_not_stop;
-    }
-
-    if (range->start >= buffer->used) {
-      return F_data_not_eos;
-    }
-
     f_string_range_t found_vocabulary = f_string_range_t_initialize;
     f_array_length_t found_content = 0;
     f_array_length_t vocabulary_slash_first = range->start;
@@ -100,14 +86,38 @@ extern "C" {
     uint8_t quote = 0;
 
     bool vocabulary_delimited = F_false;
-    bool find_next = F_false;
-
-    found_vocabulary.start = range->start;
 
     do {
 
-      // Find the start and end of the vocabulary name.
-      while (range->start <= range->stop && range->start < buffer->used) {
+      // Find the start of the vocabulary name.
+      while (F_status_is_error_not(status) && range->start <= range->stop && range->start < buffer->used) {
+
+        if (state.interrupt) {
+          status = state.interrupt((void *) &state, 0);
+
+          if (F_status_set_fine(status) == F_interrupt) {
+            status = F_status_set_error(F_interrupt);
+
+            break;
+          }
+        }
+
+        width_max = buffer->used - range->start;
+
+        status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false);
+        if (F_status_is_error(status)) break;
+
+        if (status == F_true) {
+          found_vocabulary.start = range->start++;
+
+          break;
+        }
+
+        status = f_utf_buffer_increment(*buffer, range, 1);
+      } // while
+
+      // Find the end of the vocabulary name.
+      while (F_status_is_error_not(status) && range->start <= range->stop && range->start < buffer->used) {
 
         if (state.interrupt) {
           status = state.interrupt((void *) &state, 0);
@@ -147,11 +157,8 @@ extern "C" {
 
             break;
           }
-
-          break;
         }
-
-        if (buffer->string[range->start] == f_iki_syntax_slash_s.string[0]) {
+        else if (buffer->string[range->start] == f_iki_syntax_slash_s.string[0]) {
           bool separator_found = F_false;
 
           vocabulary_slash_first = range->start;
@@ -159,6 +166,16 @@ extern "C" {
           // The slash only needs to be delimited if it were to otherwise be a valid vocabulary name.
           while (range->start <= range->stop && range->start < buffer->used) {
 
+            if (state.interrupt) {
+              status = state.interrupt((void *) &state, 0);
+
+              if (F_status_set_fine(status) == F_interrupt) {
+                status = F_status_set_error(F_interrupt);
+
+                break;
+              }
+            }
+
             if (buffer->string[range->start] == f_iki_syntax_placeholder_s.string[0]) {
               ++range->start;
 
@@ -169,21 +186,14 @@ extern "C" {
               if (buffer->string[range->start] == f_iki_syntax_quote_single_s.string[0] || buffer->string[range->start] == f_iki_syntax_quote_double_s.string[0]) {
                 vocabulary_delimited = F_true;
                 quote = buffer->string[range->start++];
-
-                break;
               }
-              else {
-                find_next = F_true;
 
-                break;
-              }
+              break;
             }
             else if (buffer->string[range->start] == f_iki_syntax_separator_s.string[0]) {
               separator_found = F_true;
             }
             else if (buffer->string[range->start] != f_iki_syntax_slash_s.string[0]) {
-              find_next = F_true;
-
               break;
             }
 
@@ -191,47 +201,23 @@ extern "C" {
             if (F_status_is_error(status)) break;
           } // while
 
-          break;
+          if (F_status_is_error(status) || range->start > range->stop || range->start >= buffer->used) break;
         }
-
-        width_max = (range->stop - range->start) + 1;
-
-        if (width_max > buffer->used - range->start) {
+        else {
           width_max = buffer->used - range->start;
-        }
 
-        status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false);
-        if (F_status_is_error(status)) break;
-
-        // Current word-dash-plus block is not a valid variable name, try again.
-        if (status == F_false) {
-          status = private_f_iki_seek_special(*buffer, range);
+          status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false);
           if (F_status_is_error(status)) break;
 
-          if (range->start > range->stop) {
-            data->delimits.used = delimits_used;
-
-            return F_data_not_stop;
-          }
-
-          if (range->start >= buffer->used) {
-            data->delimits.used = delimits_used;
-
-            return F_data_not_eos;
-          }
-
-          found_vocabulary.start = range->start;
+          // Not a valid IKI vocabulary name.
+          if (status != F_true) break;
         }
 
         status = f_utf_buffer_increment(*buffer, range, 1);
         if (F_status_is_error(status)) break;
       } // while
 
-      if (F_status_is_error(status)) {
-        data->delimits.used = delimits_used;
-
-        return status;
-      }
+      if (F_status_is_error(status) || range->start > range->stop || range->start >= buffer->used) break;
 
       // Process potentially valid content.
       if (quote) {
@@ -264,7 +250,6 @@ extern "C" {
 
               data->delimits.array[data->delimits.used++] = vocabulary_slash_first;
 
-              find_next = F_true;
               vocabulary_delimited = F_false;
               quote = 0;
 
@@ -301,7 +286,8 @@ extern "C" {
 
             return F_none;
           }
-          else if (buffer->string[range->start] == f_iki_syntax_slash_s.string[0]) {
+
+          if (buffer->string[range->start] == f_iki_syntax_slash_s.string[0]) {
             f_array_length_t content_slash_first = range->start;
             f_array_length_t content_slash_total = 0;
 
@@ -355,11 +341,6 @@ extern "C" {
                     quote = 0;
 
                     ++range->start;
-
-                    // Skip past all initial non-word, non-dash, and non-plus.
-                    status = private_f_iki_seek_special(*buffer, range);
-
-                    found_vocabulary.start = range->start;
                   }
                   else {
                     status = f_string_ranges_increase(state.step_small, &data->variable);
@@ -409,26 +390,27 @@ extern "C" {
           status = f_utf_buffer_increment(*buffer, range, 1);
           if (F_status_is_error(status)) break;
         } // while
+
+        if (F_status_is_error(status)) break;
+
+        quote = 0;
       }
       else {
         vocabulary_delimited = F_false;
-        find_next = F_true;
       }
 
-      if (F_status_is_error(status)) {
-        data->delimits.used = delimits_used;
+      if (F_status_is_error(status) || range->start > range->stop || range->start >= buffer->used) break;
 
-        return status;
-      }
+      status = f_utf_buffer_increment(*buffer, range, 1);
+      if (F_status_is_error(status)) break;
 
-      if (find_next) {
-        status = private_f_iki_seek_special(*buffer, range);
+    } while (range->start <= range->stop && range->start < buffer->used);
 
-        found_vocabulary.start = range->start;
-        find_next = F_false;
-      }
+    if (F_status_is_error(status)) {
+      data->delimits.used = delimits_used;
 
-    } while (range->start <= range->stop && range->start < buffer->used);
+      return status;
+    }
 
     if (range->start > range->stop) {
       return F_data_not_stop;
diff --git a/level_0/f_iki/c/private-iki.c b/level_0/f_iki/c/private-iki.c
index 0a4727ee3..cc4b5dc9e 100644
--- a/level_0/f_iki/c/private-iki.c
+++ b/level_0/f_iki/c/private-iki.c
@@ -81,48 +81,6 @@ extern "C" {
   }
 #endif // !defined(_di_f_iki_datas_append_) || !defined(_di_f_iki_datas_decrease_by_) || !defined(_di_f_iki_datas_increase_) || !defined(_di_f_iki_datas_increase_by_) || !defined(_di_f_iki_datas_resize_)
 
-#if !defined(_di_f_iki_read_)
-  f_status_t private_f_iki_seek_special(const f_string_static_t buffer, f_string_range_t * const range) {
-
-    f_status_t status = F_none;
-    f_array_length_t width_max = 0;
-
-    while (range->start <= range->stop && range->start < buffer.used) {
-
-      if (buffer.string[range->start] == f_iki_syntax_separator_s.string[0]) {
-        break;
-      }
-
-      if (buffer.string[range->start] == f_iki_syntax_quote_double_s.string[0]) {
-        break;
-      }
-
-      if (buffer.string[range->start] == f_iki_syntax_quote_single_s.string[0]) {
-        break;
-      }
-
-      if (buffer.string[range->start] == f_iki_syntax_slash_s.string[0]) {
-        break;
-      }
-
-      width_max = (range->stop - range->start) + 1;
-
-      if (width_max > buffer.used - range->start) {
-        width_max = buffer.used - range->start;
-      }
-
-      status = f_utf_is_word_dash_plus(buffer.string + range->start, width_max, F_false);
-      if (F_status_is_error(status)) return status;
-      if (status == F_true) return status;
-
-      status = f_utf_buffer_increment(buffer, range, 1);
-      if (F_status_is_error(status)) return status;
-    } // while
-
-    return F_false;
-  }
-#endif // !defined(_di_f_iki_read_)
-
 #if !defined(_di_f_iki_content_is_) || !defined(_di_f_iki_content_partial_is_)
   f_status_t private_f_iki_content_partial_is(const f_string_static_t buffer, const f_string_range_t range, const char quote) {
 
diff --git a/level_0/f_iki/c/private-iki.h b/level_0/f_iki/c/private-iki.h
index 24c266479..658e96356 100644
--- a/level_0/f_iki/c/private-iki.h
+++ b/level_0/f_iki/c/private-iki.h
@@ -116,31 +116,6 @@ extern "C" {
   extern f_status_t private_f_iki_object_partial_is(const f_string_static_t buffer, const f_string_range_t range) F_attribute_visibility_internal_d;
 #endif // !defined(_di_f_iki_object_is_) || !defined(_di_f_iki_object_partial_is_)
 
-/**
- * Seek until a word, a dash, or a plus is found or is not found.
- *
- * This will ignore the delimit placeholder.
- * This will stop at any special characters, such as a colon, a single quote, a double quote, or a backslash.
- *
- * @param buffer
- *   The string to process.
- * @param range
- *   The range within the buffer that represents the current position being processed.
- *
- * @return
- *   F_true if stopped on a word, a dash, or a plus.
- *   F_false if stopped on a plus scolon, single quote, double quote, and backslash or stopped on something other than a word, a dash, or a plus.
- *
- *   Errors (with error bit) from: f_utf_buffer_increment().
- *   Errors (with error bit) from: f_utf_is_word_dash_plus().
- *
- * @see f_utf_buffer_increment()
- * @see f_utf_is_word_dash_plus()
- */
-#if !defined(_di_f_iki_read_)
-  extern f_status_t private_f_iki_seek_special(const f_string_static_t buffer, f_string_range_t * const range) F_attribute_visibility_internal_d;
-#endif // !defined(_di_f_iki_read_)
-
 #ifdef __cplusplus
 } // extern "C"
 #endif
diff --git a/specifications/iki.txt b/specifications/iki.txt
index 2b20438b0..40be258fe 100644
--- a/specifications/iki.txt
+++ b/specifications/iki.txt
@@ -11,7 +11,7 @@ IKI Specifications:
   The IKI format will use iki-0000 to represent an IKI with no explicitly defined vocabulary.
   Whereas iki-0001 and beyond represent a specific IKI vocabulary.
 
-  Whitespace, non-word (and non "_", "-", "+") character punctuations, or the start of file must exist before any valid variable name.
+  A potential IKI variable name starts on word (or "_", "-", "+") characters.
   Whitespace and non-word (and non "_", "-", "+") character punctuations may not exist as part of the variable name.
   The only Unicode dash-like characters allowed as a "dash" are those intended to connect, such as the Unicode hyphens (U+2010 and U+2011).
 
-- 
2.52.0