]> Kevux Git Server - fll/commitdiff
Bugfix: Last character of file after conversion from code point is not printed by...
authorKevin Day <thekevinday@gmail.com>
Sat, 11 Jun 2022 19:09:39 +0000 (14:09 -0500)
committerKevin Day <thekevinday@gmail.com>
Sat, 11 Jun 2022 19:09:39 +0000 (14:09 -0500)
The algorithm doesn't print the character until it knows when the character is complete.
There are no checks for when end of file is reached.
This results in the last character not being printed, even if the code is complete.

Be sure to return the status rather than always returning F_none under certain circumstances in utf8_detect_codepoint().
Update documentation about return value in utf8_detect_codepoint().
Initialize the character.used to 0 rather than 4 (because it has no data!).
For better practice, compare using >= rather than ==.
Remove unnecessary i = 0 assignment.

level_3/utf8/c/private-utf8_codepoint.c
level_3/utf8/c/private-utf8_codepoint.h

index 30cbe37a495e23b22b4e252ba40dad697a641eba..fcf8427482e8eb221dda23fc001d301a4cd79ed7 100644 (file)
@@ -237,6 +237,9 @@ extern "C" {
         if (status == F_true) {
           status = F_space;
         }
+        else {
+          status = F_none;
+        }
       }
     }
 
@@ -300,7 +303,7 @@ extern "C" {
       }
     }
 
-    return F_none;
+    return status;
   }
 #endif // _di_utf8_detect_codepoint_
 
@@ -316,12 +319,37 @@ extern "C" {
     f_array_length_t j = 0;
 
     f_char_t block[5] = { 0, 0, 0, 0, 0 };
-    f_string_static_t character = macro_f_string_static_t_initialize(block, 0, 4);
+    f_string_static_t character = macro_f_string_static_t_initialize(block, 0, 0);
 
     do {
       status = f_file_read_block(file, &data->buffer);
 
-      if (status == F_none_eof && !data->buffer.used) break;
+      if (status == F_none_eof && !data->buffer.used) {
+
+        // Handle complete character, which must be explicitly set to end in this situation.
+        if (mode_codepoint == utf8_codepoint_mode_number_e || mode_codepoint == utf8_codepoint_mode_raw_number_e) {
+          if (mode_codepoint == utf8_codepoint_mode_number_e) {
+            mode_codepoint = utf8_codepoint_mode_end_e;
+
+            status = utf8_convert_codepoint(data, character, &mode_codepoint);
+          }
+          else if (mode_codepoint == utf8_codepoint_mode_raw_number_e) {
+            mode_codepoint = utf8_codepoint_mode_raw_end_e;
+
+            status = utf8_convert_raw(data, character, &mode_codepoint);
+
+            // Raw mode represents an invalid Unicode sequence.
+            valid = F_false;
+          }
+
+          j = 0;
+          next = F_true;
+          status = F_none_eof;
+          mode_codepoint = utf8_codepoint_mode_ready_e;
+        }
+
+        break;
+      }
 
       for (i = 0; F_status_is_fine(status) && i < data->buffer.used; ) {
 
@@ -349,7 +377,7 @@ extern "C" {
           character.string[j] = data->buffer.string[i];
         } // for
 
-        if (j == character.used) {
+        if (j >= character.used) {
           if (data->mode & utf8_mode_from_bytesequence_d) {
             status = utf8_convert_bytesequence(data, character);
           }
@@ -378,7 +406,6 @@ extern "C" {
         }
       } // for
 
-      i = 0;
       data->buffer.used = 0;
 
     } while (F_status_is_fine(status) && status != F_interrupt);
index 69317a54a81a1082cb2a9fe534fc99a3563bf127..295402216e05776e41fd569ff8b7a2bf944efabd 100644 (file)
@@ -77,6 +77,7 @@ extern "C" {
  * @return
  *   F_none on success.
  *   F_next on success, but should not be processed (it is white space or NULL).
+ *   F_space on success, but the character is whitespace.
  *
  *   Errors (with error bit) from: f_utf_is_whitespace()
  */