Bugfix: UTF-8 functions fail to properly handle ASCII.

author Kevin Day <thekevinday@gmail.com>

Fri, 5 Nov 2021 01:53:17 +0000 (20:53 -0500)

committer Kevin Day <thekevinday@gmail.com>

Fri, 5 Nov 2021 01:53:17 +0000 (20:53 -0500)
author Kevin Day <thekevinday@gmail.com>
Fri, 5 Nov 2021 01:53:17 +0000 (20:53 -0500)
committer Kevin Day <thekevinday@gmail.com>
Fri, 5 Nov 2021 01:53:17 +0000 (20:53 -0500)
diff --git a/level_0/f_utf/c/private-utf.c b/level_0/f_utf/c/private-utf.c

index 98d76fd770dbbb590275402ca9ccbe55dfb38b80..50b09085e3b874d9398b5f7c23a0d27ab5a39185 100644 (file)
--- a/level_0/f_utf/c/private-utf.c
+++ b/level_0/f_utf/c/private-utf.c
@@ -187,7 +187,7 @@ extern "C" {
  #if !defined(_di_f_utf_character_is_ascii_) || !defined(_di_f_utf_is_ascii_)
    f_status_t private_f_utf_character_is_ascii(const f_utf_character_t character, const uint8_t width) {
  
-    if (!width) {
+    if (width < 2) {
        const uint8_t byte_first = macro_f_utf_character_t_to_char_1(character);
  
        if (byte_first >= 0x00 && byte_first <= 0x7f) {
@@ -1975,7 +1975,7 @@ extern "C" {
      // reduce the number of checks by grouping checks by byte.
      const uint8_t byte_first = macro_f_utf_character_t_to_char_1(character);
  
-    if (!width) {
+    if (width < 2) {
        if (byte_first >= 0x00 && byte_first <= 0x7f) {
          return F_true;
        }
diff --git a/level_0/f_utf/c/private-utf.h b/level_0/f_utf/c/private-utf.h

index 3687553e9df41a81c9642ad28a052c557d12a6f9..f2e0c3e1ba3844043d677383a75bfd4a8ed60e01 100644 (file)
--- a/level_0/f_utf/c/private-utf.h
+++ b/level_0/f_utf/c/private-utf.h
@@ -149,6 +149,7 @@ extern "C" {
   *   The character to validate.
   * @param width
   *   The number of bytes repesenting the character width.
+ *   A width of 0 or 1 are treated as ASCII (width 1).
   *
   * @return
   *   F_true if a UTF-8 control picture character.
@@ -425,6 +426,7 @@ extern "C" {
   *   The character to validate.
   * @param width
   *   The number of bytes repesenting the character width.
+ *   A width of 0 or 1 are treated as ASCII (width 1).
   *
   * @return
   *   F_true if a UTF-8 character.
diff --git a/level_3/byte_dump/c/byte_dump.h b/level_3/byte_dump/c/byte_dump.h

index 2002fa4d6c8d07422985415b3c8008d37ab9f93e..5c2f7389f90af5d5a78a7292481b711511e3c97d 100644 (file)
--- a/level_3/byte_dump/c/byte_dump.h
+++ b/level_3/byte_dump/c/byte_dump.h
@@ -5,7 +5,7 @@
   * API Version: 0.5
   * Licenses: lgplv2.1
   *
- * This is intendend to support Unicode 12.1.
+ * This is intendend to support Unicode 14.0.
   *
   * When using "text" mode, this program attempts to translate UTF-8 sequences such that certain codes don't cause printing problems.
   * There may be cases where there are unknown codes that get printed and the invalid UTF-8 marker may be displayed not by this program but instead by the shell or some other program.
author	Kevin Day <thekevinday@gmail.com>
	Fri, 5 Nov 2021 01:53:17 +0000 (20:53 -0500)
committer	Kevin Day <thekevinday@gmail.com>
	Fri, 5 Nov 2021 01:53:17 +0000 (20:53 -0500)
level_0/f_utf/c/private-utf.c		patch \| blob \| history
level_0/f_utf/c/private-utf.h		patch \| blob \| history
level_3/byte_dump/c/byte_dump.h		patch \| blob \| history