From: Kevin Day <thekevinday@gmail.com>
Date: Sat, 11 Jun 2022 19:09:39 +0000 (-0500)
Subject: Bugfix: Last character of file after conversion from code point is not printed by... 
X-Git-Tag: 0.5.10~51
X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=a4e63c1483a32356f32174c9b396fcbd5e554f7d;p=fll

Bugfix: Last character of file after conversion from code point is not printed by utf8 program.

The algorithm doesn't print the character until it knows when the character is complete.
There are no checks for when end of file is reached.
This results in the last character not being printed, even if the code is complete.

Be sure to return the status rather than always returning F_none under certain circumstances in utf8_detect_codepoint().
Update documentation about return value in utf8_detect_codepoint().
Initialize the character.used to 0 rather than 4 (because it has no data!).
For better practice, compare using >= rather than ==.
Remove unnecessary i = 0 assignment.
---

diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c
index 30cbe37a4..fcf842748 100644
--- a/level_3/utf8/c/private-utf8_codepoint.c
+++ b/level_3/utf8/c/private-utf8_codepoint.c
@@ -237,6 +237,9 @@ extern "C" {
         if (status == F_true) {
           status = F_space;
         }
+        else {
+          status = F_none;
+        }
       }
     }
 
@@ -300,7 +303,7 @@ extern "C" {
       }
     }
 
-    return F_none;
+    return status;
   }
 #endif // _di_utf8_detect_codepoint_
 
@@ -316,12 +319,37 @@ extern "C" {
     f_array_length_t j = 0;
 
     f_char_t block[5] = { 0, 0, 0, 0, 0 };
-    f_string_static_t character = macro_f_string_static_t_initialize(block, 0, 4);
+    f_string_static_t character = macro_f_string_static_t_initialize(block, 0, 0);
 
     do {
       status = f_file_read_block(file, &data->buffer);
 
-      if (status == F_none_eof && !data->buffer.used) break;
+      if (status == F_none_eof && !data->buffer.used) {
+
+        // Handle complete character, which must be explicitly set to end in this situation.
+        if (mode_codepoint == utf8_codepoint_mode_number_e || mode_codepoint == utf8_codepoint_mode_raw_number_e) {
+          if (mode_codepoint == utf8_codepoint_mode_number_e) {
+            mode_codepoint = utf8_codepoint_mode_end_e;
+
+            status = utf8_convert_codepoint(data, character, &mode_codepoint);
+          }
+          else if (mode_codepoint == utf8_codepoint_mode_raw_number_e) {
+            mode_codepoint = utf8_codepoint_mode_raw_end_e;
+
+            status = utf8_convert_raw(data, character, &mode_codepoint);
+
+            // Raw mode represents an invalid Unicode sequence.
+            valid = F_false;
+          }
+
+          j = 0;
+          next = F_true;
+          status = F_none_eof;
+          mode_codepoint = utf8_codepoint_mode_ready_e;
+        }
+
+        break;
+      }
 
       for (i = 0; F_status_is_fine(status) && i < data->buffer.used; ) {
 
@@ -349,7 +377,7 @@ extern "C" {
           character.string[j] = data->buffer.string[i];
         } // for
 
-        if (j == character.used) {
+        if (j >= character.used) {
           if (data->mode & utf8_mode_from_bytesequence_d) {
             status = utf8_convert_bytesequence(data, character);
           }
@@ -378,7 +406,6 @@ extern "C" {
         }
       } // for
 
-      i = 0;
       data->buffer.used = 0;
 
     } while (F_status_is_fine(status) && status != F_interrupt);
diff --git a/level_3/utf8/c/private-utf8_codepoint.h b/level_3/utf8/c/private-utf8_codepoint.h
index 69317a54a..295402216 100644
--- a/level_3/utf8/c/private-utf8_codepoint.h
+++ b/level_3/utf8/c/private-utf8_codepoint.h
@@ -77,6 +77,7 @@ extern "C" {
  * @return
  *   F_none on success.
  *   F_next on success, but should not be processed (it is white space or NULL).
+ *   F_space on success, but the character is whitespace.
  *
  *   Errors (with error bit) from: f_utf_is_whitespace()
  */