Bugfix: The f_parse parsing functions do not check buffer.used.

author Kevin Day <kevin@kevux.org>

Mon, 27 Mar 2023 12:35:06 +0000 (07:35 -0500)

committer Kevin Day <kevin@kevux.org>

Mon, 27 Mar 2023 12:35:06 +0000 (07:35 -0500)
author Kevin Day <kevin@kevux.org>
Mon, 27 Mar 2023 12:35:06 +0000 (07:35 -0500)
committer Kevin Day <kevin@kevux.org>
Mon, 27 Mar 2023 12:35:06 +0000 (07:35 -0500)
diff --git a/level_0/f_parse/c/parse.c b/level_0/f_parse/c/parse.c

index 237efb1f779c24a4ede3855e5bea239317c004fa..dd6b81c8c184059ca15224e4076d1e0aa03d6464 100644 (file)
--- a/level_0/f_parse/c/parse.c
+++ b/level_0/f_parse/c/parse.c
@@ -12,6 +12,7 @@ extern "C" {
  
      if (!buffer.used) return F_data_not;
      if (range->start > range->stop) return F_data_not_stop;
+    if (range->start >= buffer.used) return F_data_not_eos;
  
      const unsigned short seek_width = macro_f_utf_char_t_width(seek_to_this);
  
@@ -20,7 +21,7 @@ extern "C" {
      unsigned short width = 0;
      f_array_length_t width_max = 0;
  
-    while (range->start <= range->stop) {
+    while (range->start <= range->stop && range->start < buffer.used) {
  
        width_max = (range->stop - range->start) + 1;
        width = macro_f_utf_byte_width_is(buffer.string[range->start]);
@@ -37,6 +38,7 @@ extern "C" {
        }
        else {
          if (range->start + width > range->stop) return F_status_set_error(F_complete_not_utf_stop);
+        if (range->start + width >= buffer.used) return F_status_set_error(F_complete_not_utf_eos);
  
          if (width == seek_width) {
            f_utf_char_t character = 0;
@@ -50,6 +52,7 @@ extern "C" {
        range->start += width;
  
        if (range->start >= range->stop) return F_none_stop;
+      if (range->start > buffer.used) return F_none_eos;
      } // while
  
      return F_none_eos;
@@ -64,6 +67,7 @@ extern "C" {
  
      if (!buffer.used) return F_data_not;
      if (range->start > range->stop) return F_data_not_stop;
+    if (range->start >= buffer.used) return F_data_not_eos;
  
      f_status_t status = F_none;
      unsigned short width = 0;
@@ -87,11 +91,13 @@ extern "C" {
        }
        else {
          if (range->start + width > range->stop) return F_status_set_error(F_complete_not_utf_stop);
+        if (range->start + width >= buffer.used) return F_status_set_error(F_complete_not_utf_eos);
        }
  
        range->start += width;
  
        if (range->start > range->stop) return F_none_stop;
+      if (range->start >= buffer.used) return F_none_eos;
  
        width_max = (range->stop - range->start) + 1;
      } // while
@@ -110,6 +116,7 @@ extern "C" {
  
      if (!buffer.used) return F_data_not;
      if (range->start > range->stop) return F_data_not_stop;
+    if (range->start >= buffer.used) return F_data_not_eos;
  
      f_status_t status = F_none;
      unsigned short width = 0;
@@ -133,11 +140,13 @@ extern "C" {
        }
        else {
          if (range->start + width > range->stop) return F_status_set_error(F_complete_not_utf_stop);
+        if (range->start + width >= buffer.used) return F_status_set_error(F_complete_not_utf_eos);
        }
  
        range->start += width;
  
        if (range->start > range->stop) return F_none_stop;
+      if (range->start >= buffer.used) return F_none_eos;
  
        width_max = (range->stop - range->start) + 1;
      } // while
@@ -157,6 +166,7 @@ extern "C" {
      if (!buffer.used) return F_data_not;
  
      if (range->start > range->stop) return F_data_not_stop;
+    if (range->start > buffer.used) return F_data_not_eos;
  
      const unsigned short seek_width = macro_f_utf_char_t_width(seek_to_this);
  
@@ -166,7 +176,7 @@ extern "C" {
  
      f_array_length_t width_max = 0;
  
-    while (range->start <= range->stop) {
+    while (range->start <= range->stop && range->start < buffer.used) {
  
        width_max = (range->stop - range->start) + 1;
        width = macro_f_utf_byte_width_is(buffer.string[range->start]);
@@ -183,9 +193,8 @@ extern "C" {
          return F_status_set_error(F_complete_not_utf);
        }
        else {
-        if (range->start + width > range->stop) {
-          return F_status_set_error(F_complete_not_utf_stop);
-        }
+        if (range->start + width > range->stop) return F_status_set_error(F_complete_not_utf_stop);
+        if (range->start + width >= buffer.used) return F_status_set_error(F_complete_not_utf_eos);
  
          if (width == seek_width) {
            f_utf_char_t character = 0;
@@ -197,10 +206,10 @@ extern "C" {
        }
  
        range->start += width;
-
-      if (range->start >= range->stop) return F_none_stop;
      } // while
  
+    if (range->start >= range->stop) return F_none_stop;
+
      return F_none_eos;
    }
  #endif // _di_f_parse_dynamic_seek_to_utf_character_
@@ -217,7 +226,7 @@ extern "C" {
      }
  
      // Skip past all leading NULLs.
-    for (; range->start <= range->stop; ++range->start) {
+    for (; range->start <= range->stop && range->start < buffer.used; ++range->start) {
        if (buffer.string[range->start]) break;
      } // for
  
@@ -229,7 +238,7 @@ extern "C" {
      if (range->stop - range->start < 5) {
  
        // Increment until stop, while taking into consideration UTF-8 character widths.
-      for (; range->start <= range->stop; ) {
+      for (; range->start <= range->stop && range->start < buffer.used; ) {
  
          if (buffer.string[range->start] == f_string_eol_s.string[0]) {
            ++range->start;
@@ -245,7 +254,7 @@ extern "C" {
  
      f_status_t status = F_none;
  
-    for (; range->start <= range->stop; ) {
+    for (; range->start <= range->stop && range->start < buffer.used; ) {
  
        status = f_utf_is_whitespace(buffer.string + range->start, (range->stop - range->start) + 1, F_false);
  
@@ -274,14 +283,12 @@ extern "C" {
        range->start += macro_f_utf_byte_width(buffer.string[range->start]);
      } // for
  
-    if (range->start > range->stop) {
-      return F_found_not;
-    }
+    if (range->start > range->stop || range->start > buffer.used) return F_found_not;
  
      if (range->stop - range->start < 5) {
  
        // Increment until stop, while taking into consideration UTF-8 character widths.
-      for (; range->start <= range->stop; ) {
+      for (; range->start <= range->stop && range->start < buffer.used; ) {
  
          if (buffer.string[range->start] == f_string_eol_s.string[0]) {
            ++range->start;
@@ -297,7 +304,7 @@ extern "C" {
  
      f_array_length_t i = range->start;
  
-    for (; range->start <= range->stop; ) {
+    for (; range->start <= range->stop && range->start < buffer.used; ) {
  
        status = f_utf_is_word(buffer.string + range->start, (range->stop - range->start) + 1, F_true);
        if (F_status_is_error(status)) return status;
@@ -315,10 +322,10 @@ extern "C" {
        range->start += macro_f_utf_byte_width(buffer.string[range->start]);
      } // for
  
-    if (range->start > range->stop || buffer.string[range->start] != f_string_ascii_minus_s.string[0]) {
+    if (range->start > range->stop || range->start >= buffer.used || buffer.string[range->start] != f_string_ascii_minus_s.string[0]) {
  
        // Increment until stop, while taking into consideration UTF-8 character widths.
-      for (; range->start <= range->stop; ) {
+      for (; range->start <= range->stop && range->start < buffer.used; ) {
  
          if (buffer.string[range->start] == f_string_eol_s.string[0]) {
            ++range->start;
@@ -336,13 +343,13 @@ extern "C" {
        f_array_length_t j = 0;
        f_char_t number[5] = { 0, 0, 0, 0, 0 };
  
-      for (++range->start; range->start <= range->stop && j < 4; ++range->start, ++j) {
+      for (++range->start; range->start <= range->stop && range->start < buffer.used && j < 4; ++range->start, ++j) {
  
          // The hexidecimal representing the number may only be ASCII.
          if (macro_f_utf_byte_width_is(buffer.string[range->start])) {
  
            // Increment until stop, while taking into consideration UTF-8 character widths.
-          for (; range->start <= range->stop; ) {
+          for (; range->start <= range->stop && range->start < buffer.used; ) {
  
              if (buffer.string[range->start] == f_string_eol_s.string[0]) {
                ++range->start;
@@ -378,7 +385,7 @@ extern "C" {
        else {
  
          // Increment until stop, while taking into consideration UTF-8 character widths.
-        for (; range->start <= range->stop; ) {
+        for (; range->start <= range->stop && range->start < buffer.used; ) {
  
            if (buffer.string[range->start] == f_string_eol_s.string[0]) {
              ++range->start;
@@ -398,7 +405,7 @@ extern "C" {
      }
  
      // Skip past all NULLs.
-    for (; range->start <= range->stop; ++range->start) {
+    for (; range->start <= range->stop && range->start < buffer.used; ++range->start) {
        if (buffer.string[range->start]) break;
      } // for
  
@@ -417,7 +424,7 @@ extern "C" {
        if (status == F_false) {
  
          // Increment until stop, while taking into consideration UTF-8 character widths.
-        for (; range->start <= range->stop; ) {
+        for (; range->start <= range->stop && range->start < buffer.used; ) {
  
            if (buffer.string[range->start] == f_string_eol_s.string[0]) {
              ++range->start;
@@ -441,7 +448,7 @@ extern "C" {
      }
  
      if (id) {
-      for (f_array_length_t j = i, i = 0; j <= range->stop; ++j) {
+      for (f_array_length_t j = i, i = 0; j <= range->stop && j < buffer.used; ++j) {
  
          if (!buffer.string[j]) continue;
          if (buffer.string[j] == f_string_ascii_minus_s.string[0]) break;
@@ -466,9 +473,7 @@ extern "C" {
        if (!range) return F_status_set_error(F_parameter);
      #endif // _di_level_0_parameter_checking_
  
-    if (range->start > range->stop) {
-      return F_data_not_stop;
-    }
+    if (range->start > range->stop) return F_data_not_stop;
  
      const unsigned short seek_width = macro_f_utf_char_t_width(seek_to);
  
diff --git a/level_0/f_parse/c/parse.h b/level_0/f_parse/c/parse.h

index 2ecd78a3bff86fa8c4e75f68e1c265a3f9c3771e..41fff433b1bc49a125447035e6f0f2c78b226cf8 100644 (file)
--- a/level_0/f_parse/c/parse.h
+++ b/level_0/f_parse/c/parse.h
@@ -40,12 +40,15 @@ extern "C" {
   * @return
   *   F_none on success.
   *   F_none_eol on success, but stopped at EOL.
+ *   F_none_eos on success, but stopped at end of buffer.
   *   F_none_stop on success, but stopped at end of range.
   *   F_data_not on success, but there was no string data to seek.
+ *   F_data_not_eos on success, but the range.start >= buffer.used.
   *   F_data_not_stop on success, but the range.start > range.stop.
   *
   *   F_complete_not_utf (with error bit) if character is an incomplete UTF-8 fragment.
   *   F_complete_not_utf_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   F_complete_not_utf_eos (with error bit) if the end of buffer is reached before the complete UTF-8 character can be processed.
   *   F_parameter (with error bit) if a parameter is invalid.
   *   F_utf_not (with error bit) if character is an invalid UTF-8 character.
   *
@@ -70,12 +73,15 @@ extern "C" {
   * @return
   *   F_none on success.
   *   F_none_eol on success, but stopped at EOL.
+ *   F_none_eos on success, but stopped at end of buffer.
   *   F_none_stop on success, but stopped at end of range.
   *   F_data_not on success, but there was no string data to seek.
+ *   F_data_not_eos on success, but the range.start >= buffer.used.
   *   F_data_not_stop on success, but the range.start > range.stop.
   *
   *   F_complete_not_utf (with error bit) if character is an incomplete UTF-8 fragment.
   *   F_complete_not_utf_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   F_complete_not_utf_eos (with error bit) if the end of buffer is reached before the complete UTF-8 character can be processed.
   *   F_memory_not (with error bit) on out of memory.
   *   F_parameter (with error bit) if a parameter is invalid.
   *
@@ -100,12 +106,15 @@ extern "C" {
   * @return
   *   F_none on success.
   *   F_none_eol on success, but stopped at EOL.
+ *   F_none_eos on success, but stopped at end of buffer.
   *   F_none_stop on success, but stopped at end of range.
   *   F_data_not on success, but there was no string data to seek.
+ *   F_data_not_eos on success, but the range.start >= buffer.used.
   *   F_data_not_stop on success, but the range.start > range.stop.
   *
   *   F_complete_not_utf (with error bit) if character is an incomplete UTF-8 fragment.
   *   F_complete_not_utf_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   F_complete_not_utf_eos (with error bit) if the end of buffer is reached before the complete UTF-8 character can be processed.
   *   F_memory_not (with error bit) on out of memory.
   *   F_parameter (with error bit) if a parameter is invalid.
   *
@@ -130,12 +139,16 @@ extern "C" {
   *
   * @return
   *   F_none on success.
+ *   F_none_eol on success, but stopped at EOL.
+ *   F_none_eos on success, but stopped at end of buffer.
   *   F_none_stop on success, but stopped at end of range.
   *   F_data_not on success, but there was no string data to seek.
+ *   F_data_not_eos on success, but the range.start >= buffer.used.
   *   F_data_not_stop on success, but the range.start > range.stop.
   *
   *   F_complete_not_utf (with error bit) if character is an incomplete UTF-8 fragment.
   *   F_complete_not_utf_stop (with error bit) if the stop location is reached before the complete UTF-8 character can be processed.
+ *   F_complete_not_utf_eos (with error bit) if the end of buffer is reached before the complete UTF-8 character can be processed.
   *   F_parameter (with error bit) if a parameter is invalid.
   *   F_utf_not (with error bit) if character is an invalid UTF-8 character.
   *
@@ -179,6 +192,8 @@ extern "C" {
   *
   * @return
   *   F_data_not if length is 0.
+ *   F_data_not_eos on success, but the range.start >= buffer.used.
+ *   F_data_not_stop on success, but the range.start > range.stop.
   *   F_found if the buffer does represent a valid FLL Identifier.
   *   F_found_not if the buffer does not represent a valid FLL Identifier.
   *
author	Kevin Day <kevin@kevux.org>
	Mon, 27 Mar 2023 12:35:06 +0000 (07:35 -0500)
committer	Kevin Day <kevin@kevux.org>
	Mon, 27 Mar 2023 12:35:06 +0000 (07:35 -0500)
level_0/f_parse/c/parse.c		patch \| blob \| history
level_0/f_parse/c/parse.h		patch \| blob \| history