The zero-width calculations should be and are not including zero-width and most control characters.
The combining characters and most of the control characters are now considered zero-width.
return F_true;
}
+ // Hanifi Rohingya: U+10D24 to U+10D27.
+ if (sequence >= 0xf090b4a3 && sequence <= 0xf090b4a7) {
+ return F_true;
+ }
+
+ // Yezidi: U+10EAB to U+10EAD.
+ if (sequence >= 0xf090baab && sequence <= 0xf090baad) {
+ return F_true;
+ }
+
// Arabic Extended-C: U+10EFD to U+10EFF.
if (sequence >= 0xf090bbbd && sequence <= 0xf090bbbf) {
return F_true;
#if !defined(_di_f_utf_character_is_zero_width_) || !defined(_di_f_utf_is_zero_width_)
f_status_t private_f_utf_character_is_zero_width(const f_utf_char_t sequence) {
- if (macro_f_utf_char_t_to_char_1(sequence) == 0xe1) {
-
- // Mongolian: U+180E.
- if (sequence == 0xe1a08e00) {
- return F_true;
- }
- }
- else if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) {
-
- // General Punctuation: U+200B, U+200C, U+200D, U+2060.
- if (sequence == 0xe2808b00 || sequence == 0xe2808c00 || sequence == 0xe2808d00 || sequence == 0xe281a000) {
- return F_true;
+ if (macro_f_utf_char_t_width_is(sequence) == 3) {
+ if (macro_f_utf_char_t_to_char_1(sequence) == 0xe2) {
+
+ // General Punctuation: U+2028 to U+2029.
+ if (sequence >= 0xf090b4a3 && sequence <= 0xf090b4a7) {
+ return F_true;
+ }
+
+ // General Punctuation: U+2066 to U+2069.
+ if (sequence >= 0xe281a600 && sequence <= 0xe281a900) {
+ return F_true;
+ }
}
- }
- else if (macro_f_utf_char_t_to_char_1(sequence) == 0xef) {
-
- // Arabic Presentation Forms-B: U+FEFF.
- if (sequence == 0xefbbbf00) {
- return F_true;
+ else if (macro_f_utf_char_t_to_char_1(sequence) == 0xef) {
+ if (macro_f_utf_char_t_to_char_2(sequence) == 0xbf) {
+
+ // Unassigned and reserved for future use: U+FFF0 to U+FFF8 (these are assumed to be zero-width).
+ if (sequence >= 0xefbfb000 && sequence <= 0xefbfb900) {
+ return F_true;
+ }
+ }
}
}
if (F_status_is_error(status)) return status;
}
+ // Control and combining characters are zero-width.
+ if (private_f_utf_character_is_control(utf)) return F_true;
+ if (private_f_utf_character_is_combining(utf)) return F_true;
+
return private_f_utf_character_is_zero_width(utf);
}
// These control characters are considered zero-width spaces.
- if (*sequence >= 0x00 && *sequence <= 0x08) {
+ if (*sequence >= 0x00 && *sequence < 0x09) {
return F_true;
}
- else if (*sequence >= 0x0c && *sequence <= 0x1f) {
+ else if (*sequence > 0x0b && *sequence < 0x20) {
return F_true;
}
else if (*sequence == 0x7f) {
return F_status_set_error(F_utf_fragment);
}
+ // Control and combining characters are zero-width.
+ if (private_f_utf_character_is_control(sequence)) return F_true;
+ if (private_f_utf_character_is_combining(sequence)) return F_true;
+
return private_f_utf_character_is_zero_width(sequence);
}
- const uint8_t ascii = macro_f_utf_char_t_to_char_1(sequence);
-
// These control characters are considered zero-width spaces.
- if (ascii >= 0x00 && ascii <= 0x08) {
- return F_true;
- }
- else if (ascii == 0x0a) {
+ if (macro_f_utf_char_t_to_char_1(sequence) >= 0x00 && macro_f_utf_char_t_to_char_1(sequence) < 0x09) {
return F_true;
}
- else if (ascii >= 0x0c && ascii <= 0x1f) {
+ else if (macro_f_utf_char_t_to_char_1(sequence) > 0x0b && macro_f_utf_char_t_to_char_1(sequence) < 0x20) {
return F_true;
}
- else if (ascii == 0x7f) {
+ else if (macro_f_utf_char_t_to_char_1(sequence) == 0x7f) {
return F_true;
}