Update: Utilize the state.flag to allow for iki read to not fail out on invalid UTF...

author Kevin Day <thekevinday@gmail.com>

Wed, 11 May 2022 03:19:54 +0000 (22:19 -0500)

committer Kevin Day <thekevinday@gmail.com>

Wed, 11 May 2022 04:44:48 +0000 (23:44 -0500)
author Kevin Day <thekevinday@gmail.com>
Wed, 11 May 2022 03:19:54 +0000 (22:19 -0500)
committer Kevin Day <thekevinday@gmail.com>
Wed, 11 May 2022 04:44:48 +0000 (23:44 -0500)
diff --git a/level_0/f_iki/c/iki.c b/level_0/f_iki/c/iki.c

index 49c68b61de1d4d5c497fdbe0b22d1a5076713a2d..5f58e6fa20b8b72a91e38b69c288fc2fededbd00 100644 (file)
--- a/level_0/f_iki/c/iki.c
+++ b/level_0/f_iki/c/iki.c
@@ -112,7 +112,7 @@ extern "C" {
      do {
  
        // Find the start of the vocabulary name.
-      while (F_status_is_error_not(status) && range->start <= range->stop && range->start < buffer->used) {
+      while (range->start <= range->stop && range->start < buffer->used) {
  
          if (state.interrupt) {
            status = state.interrupt((void *) &state, 0);
@@ -127,7 +127,19 @@ extern "C" {
          width_max = buffer->used - range->start;
  
          status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false);
-        if (F_status_is_error(status)) break;
+
+        if (F_status_is_error(status)) {
+          if (F_status_set_fine(status) == F_utf_fragment || F_status_set_fine(status) == F_complete_not_utf) {
+            if (state.flag & f_iki_state_flag_utf_fail_on_valid_not_e) {
+              break;
+            }
+
+            status = F_false;
+          }
+          else {
+            break;
+          }
+        }
  
          if (status == F_true) {
            found_vocabulary.start = range->start++;
@@ -136,6 +148,7 @@ extern "C" {
          }
  
          status = f_utf_buffer_increment(*buffer, range, 1);
+        if (F_status_is_error(status)) break;
        } // while
  
        // Find the end of the vocabulary name.
@@ -232,7 +245,19 @@ extern "C" {
            width_max = buffer->used - range->start;
  
            status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false);
-          if (F_status_is_error(status)) break;
+
+          if (F_status_is_error(status)) {
+            if (F_status_set_fine(status) == F_utf_fragment || F_status_set_fine(status) == F_complete_not_utf) {
+              if (state.flag & f_iki_state_flag_utf_fail_on_valid_not_e) {
+                break;
+              }
+
+              status = F_false;
+            }
+            else {
+              break;
+            }
+          }
  
            // Not a valid IKI vocabulary name.
            if (status != F_true) break;
@@ -431,6 +456,12 @@ extern "C" {
  
      } while (range->start <= range->stop && range->start < buffer->used);
  
+    if (F_status_set_fine(status) == F_complete_not_utf_eos || F_status_set_fine(status) == F_complete_not_utf_stop) {
+      if (!(state.flag & f_iki_state_flag_utf_fail_on_valid_not_e)) {
+        status = F_status_set_fine(status);
+      }
+    }
+
      if (F_status_is_error(status)) {
        data->delimits.used = delimits_used;
  
diff --git a/level_0/f_iki/c/iki.h b/level_0/f_iki/c/iki.h

index ded0ee5d2f42242da20d28f78867cc9dd2081548..a463044de9479f25ac8d7e4bd7076d887eda5f0b 100644 (file)
--- a/level_0/f_iki/c/iki.h
+++ b/level_0/f_iki/c/iki.h
@@ -135,10 +135,10 @@ extern "C" {
   * Calling this more than once on the same buffer range could result in multiple escaping.
   *
   * @param state
- *   A state for handling interrupts during long running operations.
- *   There is no print_error() usage at this time (@todo this should be implemented and supported).
+ *   A state for providing flags and handling interrupts during long running operations.
+ *   There is no print_error().
   *   There is no functions structure.
- *   There is no data structure passed to these functions (@todo the additional parameters could be moved to a custom structure).
+ *   There is no data structure passed to these functions.
   *
   *   When interrupt() returns, only F_interrupt and F_interrupt_not are processed.
   *   Error bit designates an error but must be passed along with F_interrupt.
@@ -157,10 +157,14 @@ extern "C" {
   *   F_none on success and an IKI vocabulary name was found.
   *   F_none_eos on success and an IKI vocabulary name was found and end of string was reached.
   *   F_none_stop on success and an IKI vocabulary name was found and stop point was reached.
+ *   F_complete_not_utf_eos on success but string ended on incomplete UTF-8 and f_iki_state_flag_utf_fail_on_valid_not_e is not set.
+ *   F_complete_not_utf_stop on success but stop point reached on incomplete UTF-8 and f_iki_state_flag_utf_fail_on_valid_not_e is not set.
   *   F_data_not on success, but there were no IKI vocabulary names found.
   *   F_data_not_eos on success and EOS was reached, but there were no IKI vocabulary names found.
   *   F_data_not_stop on success and stop point was reached, but there were no IKI vocabulary names found.
   *
+ *   F_complete_not_utf_eos (with error bit) on success but string ended on incomplete UTF-8 and f_iki_state_flag_utf_fail_on_valid_not_e is set.
+ *   F_complete_not_utf_stop (with error bit) on success but stop point reached on incomplete UTF-8 and f_iki_state_flag_utf_fail_on_valid_not_e is set.
   *   F_interrupt (with error bit) if stopping due to an interrupt.
   *   F_memory_not (with error bit) on out of memory.
   *   F_parameter (with error bit) if a parameter is invalid.
diff --git a/level_0/f_iki/c/iki/common.h b/level_0/f_iki/c/iki/common.h

index 38a56ac520fd992cc3923dcb92443088e9ac79bf..704cc89610d755727c98bcb4560acfe49a6e0105 100644 (file)
--- a/level_0/f_iki/c/iki/common.h
+++ b/level_0/f_iki/c/iki/common.h
@@ -17,6 +17,24 @@ extern "C" {
  #endif
  
  /**
+ * State flags associated with iki functions.
+ *
+ * These flags are meant to be bitwise for the 32-bit f_state_t flag property.
+ *
+ * The f_iki_state_flag_none_e is expected to be 0, therefore it must be safe to use 0 directly.
+ *
+ * f_iki_state_flag_*:
+ *   - none:                  No flags are set.
+ *   - utf_fail_on_valid_not: Immediately fail on invalid UTF-8 character (including incomplete).
+ */
+#ifndef _di_f_iki_state_flags_
+  enum {
+    f_iki_state_flag_none_e                  = 0,
+    f_iki_state_flag_utf_fail_on_valid_not_e = 0x1,
+  }; // enum
+#endif // _di_f_iki_state_flags_
+
+/**
   * IKI-specific syntax.
   */
  #ifndef _di_f_iki_syntax_
diff --git a/level_1/fl_iki/c/iki.c b/level_1/fl_iki/c/iki.c

index 08ffd6fc42ecd7bb9c95db48205530984a5a422e..37fef364d615df0e686bc525576dd2b9e15eca48 100644 (file)
--- a/level_1/fl_iki/c/iki.c
+++ b/level_1/fl_iki/c/iki.c
@@ -17,17 +17,9 @@ extern "C" {
        status = f_iki_read(state, buffer, range, data);
        if (F_status_is_error(status)) return status;
  
-      if (status == F_data_not_eos || status == F_data_not_stop) {
-        return status;
-      }
-
-      if (status == F_none_eos || status == F_none_stop) {
-        return status;
-      }
-
      } while (range->start <= range->stop && range->start < buffer->used);
  
-    return F_none;
+    return status;
    }
  #endif // _di_fl_iki_read_
  
diff --git a/level_1/fl_iki/c/iki.h b/level_1/fl_iki/c/iki.h

index 385f2f2fe60c4792b54adf4d893ea6178e7c16a5..4cabc8855f176f92c43f9217526b848055b08dd8 100644 (file)
--- a/level_1/fl_iki/c/iki.h
+++ b/level_1/fl_iki/c/iki.h
@@ -36,14 +36,7 @@ extern "C" {
   * This only finds complete vocabulary names and their respective content.
   *
   * @param state
- *   A state for handling interrupts during long running operations.
- *   There is no print_error() usage at this time (@todo this should be implemented and supported).
- *   There is no functions structure.
- *   There is no data structure passed to these functions (@todo the additional parameters could be moved to a custom structure).
- *
- *   When interrupt() returns, only F_interrupt and F_interrupt_not are processed.
- *   Error bit designates an error but must be passed along with F_interrupt.
- *   All other statuses are ignored.
+ *   A state for providing flags and handling interrupts during long running operations.
   * @param buffer
   *   The string to process.
   * @param range
@@ -58,6 +51,8 @@ extern "C" {
   *   F_none on success and an IKI vocabulary name was found.
   *   F_none_stop on success and an IKI vocabulary name was found and stop point was reached.
   *   F_none_eos on success and an IKI vocabulary name was found and end of string was reached.
+ *   F_complete_not_utf_eos on success and EOS was reached, but at an incomplete UTF-8 sequence.
+ *   F_complete_not_utf_stop on success and stop point was reached, but at an incomplete UTF-8 sequence.
   *   F_data_not_eos on success and EOS was reached, but there were no IKI vocabularie names found.
   *   F_data_not_stop on success and stop point was reached, but there were no IKI vocabularie names found.
   *
diff --git a/level_3/iki_read/c/iki_read.c b/level_3/iki_read/c/iki_read.c

index 0ebb6497053d8dde11df42a8e243b024693ef4b8..8d5f7551a068493315bcc1fd63b0a330a243c009 100644 (file)
--- a/level_3/iki_read/c/iki_read.c
+++ b/level_3/iki_read/c/iki_read.c
@@ -417,18 +417,15 @@ extern "C" {
            if (size_file > iki_read_block_max) {
              file.size_read = iki_read_block_read_large;
              size_block = iki_read_block_max;
-
-            // Pre-allocate entire file buffer plus space for the terminating NULL.
-            f_string_dynamic_increase_by(size_file + (size_block - (size_file % size_block)) + 1, &data.buffer);
            }
            else {
              file.size_read = iki_read_block_read_small;
              size_block = size_file;
-
-            // Pre-allocate entire file buffer plus space for the terminating NULL.
-            f_string_dynamic_increase_by(size_file + 1, &data.buffer);
            }
  
+          // Pre-allocate entire file buffer plus space for the terminating NULL.
+          f_string_dynamic_increase_by(size_file + 1, &data.buffer);
+
            if (F_status_is_error(status)) {
              fll_error_file_print(main->error, F_status_set_fine(status), "f_string_dynamic_resize", F_true, data.argv[main->parameters.remaining.array[i]], f_file_operation_process_s, fll_error_file_type_file_e);
  
diff --git a/level_3/iki_read/c/private-read.c b/level_3/iki_read/c/private-read.c

index 0592b513d5b93edb3d6a984a1c4b738bad619bea..494c28679800494c88980e4cbe420e433ed4aa4d 100644 (file)
--- a/level_3/iki_read/c/private-read.c
+++ b/level_3/iki_read/c/private-read.c
@@ -73,7 +73,7 @@ extern "C" {
  
        status = iki_read_process_at(data, &buffer_range);
  
-      if (status == F_true && buffer_range.start > data->buffer.used || status == F_data_not) {
+      if ((status == F_true && buffer_range.start > data->buffer.used) || status == F_data_not) {
          f_iki_data_delete(&iki_data);
  
          return F_data_not;
author	Kevin Day <thekevinday@gmail.com>
	Wed, 11 May 2022 03:19:54 +0000 (22:19 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Wed, 11 May 2022 04:44:48 +0000 (23:44 -0500)
level_0/f_iki/c/iki.c		patch \| blob \| history
level_0/f_iki/c/iki.h		patch \| blob \| history
level_0/f_iki/c/iki/common.h		patch \| blob \| history
level_1/fl_iki/c/iki.c		patch \| blob \| history
level_1/fl_iki/c/iki.h		patch \| blob \| history
level_3/iki_read/c/iki_read.c		patch \| blob \| history
level_3/iki_read/c/private-read.c		patch \| blob \| history