I couldn't easily find what Unicode version I am trying to suppot so I added a unicode.txt document.
The initial work was done for Unicode 12.1 and later switched to 14.0.
I was still in the initial stages of learning Unicode and so I expect there to be notable incompleteness or incorrectness.
With the release of Unicode 15.0 I am able to determine a distinct set of changes and I have applied the changes.
Add a private_inline_f_utf_character_handle_digit_offset() inline function to simplify some of the redundant code.
--- /dev/null
+# fss-0002
+#
+# license: cc-by-sa-4.0
+#
+
+Unicode:
+ This project attempts to support Unicode 15.0.
+
+ Given the scale of Unicode in conjunction with the knowledege and availability of the developers behind this project this is goal is likely not met.
+ The intent is to reach compatibility, therefore fixes, improvements, and corrections will continue to be made as they are discovered and time is available.
return F_true;
}
- // Kannada: U+0CE2, U+0CE3.
- if (sequence == 0xe0b3a200 || sequence == 0xe0b3a300) {
+ // Kannada: U+0CE2, U+0CE3, U+0CF3.
+ if (sequence == 0xe0b3a200 || sequence == 0xe0b3a300 || sequence == 0xe0b3b300) {
return F_true;
}
return F_true;
}
- // Lao: U+0EBB, U+0EBC.
- if (sequence == 0xe0babb00 || sequence == 0xe0babc00) {
+ // Lao: U+0EBB, U+0EBC, U+0ECE.
+ if (sequence == 0xe0babb00 || sequence == 0xe0babc00 || sequence == 0xe0bb8e00) {
return F_true;
}
return F_true;
}
+ // Arabic Extended-C: U+10EFD to U+10EFF.
+ if (sequence >= 0xf090bbbd && sequence <= 0xf090bbbf) {
+ return F_true;
+ }
+
// Brahmi: U+11001.
if (sequence == 0xf0918081) {
return F_true;
return F_true;
}
+ // Kawi: U+11F00 to U+11F03.
+ if (sequence >= 0xf091bc80 && sequence <= 0xf091bc83) {
+ return F_true;
+ }
+
+ // Kawi: U+11F34 to U+11F3A.
+ if (sequence >= 0xf091bcb4 && sequence <= 0xf091bcba) {
+ return F_true;
+ }
+
+ // Kawi: U+11F3E to U+11F42.
+ if (sequence >= 0xf091bcbe && sequence <= 0xf091bd82) {
+ return F_true;
+ }
+
// Khojki: U+1122F to U+11231.
if (sequence >= 0xf09188af && sequence <= 0xf09188b1) {
return F_true;
return F_true;
}
+ // Cyrillic Extended-D: U+1E08F.
+ if (sequence == 0xf09e828f) {
+ return F_true;
+ }
+
+ // Nag Mundari: U+1E4EC to U+1E4EF.
+ if (sequence >= 0xf09e93ac && sequence <= 0xf09e93af) {
+ return F_true;
+ }
+
// Mende Kikakui: U+1E8D0 to U+1E8D6.
if (sequence >= 0xf09ea390 && sequence <= 0xf09ea396) {
return F_true;
return F_true;
}
- // Egyptian Hieroglyphics: U+13430 to U+13438.
- if (sequence >= 0xf09390b0 && sequence <= 0xf09390b8) {
+ // Egyptian Hieroglyph Format Controls: U+13430 to U+13455.
+ if (sequence >= 0xf09390b0 && sequence <= 0xf0939195) {
return F_true;
}
return F_true;
}
- // Egyptian Hieroglyphics: U+13430 to U+13438.
- if (sequence >= 0xf09390b0 && sequence <= 0xf09390b8) {
+ // Egyptian Hieroglyph Format Controls: U+13430 to U+13455.
+ if (sequence >= 0xf09390b0 && sequence <= 0xf0939195) {
return F_true;
}
/**
* Inline helper function to reduce amount of code typed.
*
+ * Given the value, this will conditionally convert the range into an appropriate base-10 integer.
+ *
+ * This does not handle non-decimal values (non-base-10).
+ *
+ * This applies an offset with the intent of being used for producing values greater than 9 (such as 10 through 19).
+ *
+ * @param sequence
+ * The character sequence to process.
+ * @param start
+ * An inclusive start range.
+ * The base-10 stop range calculated from this.
+ * @param offset
+ * An offset needed to add to the calculated base-10 value.
+ * If value is 9 and offset is 10 then the a value of 19 is returned.
+ * @param value
+ * The value to update, if non-NULL.
+ *
+ * @return
+ * F_true for valid digit in the requested range.
+ * F_false, otherwise.
+ */
+static inline f_status_t private_inline_f_utf_character_handle_digit_offset(const f_utf_char_t sequence, const f_utf_char_t start, const uint8_t offset, uint64_t * const value) {
+
+ if (value) {
+ f_char_t ascii = 0x30;
+
+ if (macro_f_utf_char_t_width(sequence) == 2) {
+ ascii += (f_char_t) macro_f_utf_char_t_to_char_2(sequence - start);
+ }
+ else if (macro_f_utf_char_t_width(sequence) == 3) {
+ ascii += (f_char_t) macro_f_utf_char_t_to_char_3(sequence - start);
+ }
+ else if (macro_f_utf_char_t_width(sequence) == 4) {
+ ascii += (f_char_t) macro_f_utf_char_t_to_char_4(sequence - start);
+ }
+
+ if (private_f_utf_character_is_digit_for_ascii(ascii, value) == F_true) {
+ *value += offset;
+
+ return F_true;
+ }
+
+ return F_false;
+ }
+
+ return F_true;
+}
+
+/**
+ * Inline helper function to reduce amount of code typed.
+ *
* Given the value, this will conditionally convert the range into an appropriate base-10 integer from 1 to 9.
*
* This does not handle non-decimal values (non-base-10).
return private_inline_f_utf_character_handle_digit_from_one(sequence, 0xe291a000, value);
}
- // Enclosed Alphanumerics: U+2469.
- if (sequence == 0xe291a900) {
- if (value) {
- *value = 10;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+246A.
- if (sequence == 0xe291aa00) {
- if (value) {
- *value = 11;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+246B.
- if (sequence == 0xe291ab00) {
- if (value) {
- *value = 12;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+246C.
- if (sequence == 0xe291ac00) {
- if (value) {
- *value = 13;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+246D.
- if (sequence == 0xe291ad00) {
- if (value) {
- *value = 14;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+246E.
- if (sequence == 0xe291ae00) {
- if (value) {
- *value = 15;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+246F.
- if (sequence == 0xe291af00) {
- if (value) {
- *value = 16;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2470.
- if (sequence == 0xe291b000) {
- if (value) {
- *value = 17;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2471.
- if (sequence == 0xe291b100) {
- if (value) {
- *value = 18;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2472.
- if (sequence == 0xe291b200) {
- if (value) {
- *value = 19;
- }
-
- return F_true;
+ // Enclosed Alphanumerics: U+2469 to U+2472.
+ if (sequence <= 0xe291b200) {
+ return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe291a900, 10, value);
}
// Enclosed Alphanumerics: U+2473.
return private_inline_f_utf_character_handle_digit_from_one(sequence, 0xe291b400, value);
}
- // Enclosed Alphanumerics: U+247D.
- if (sequence == 0xe291bd00) {
- if (value) {
- *value = 10;
- }
-
- return F_true;
+ // Enclosed Alphanumerics: U+247D to U+247F.
+ if (sequence <= 0xe291bf00) {
+ return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe291bd00, 10, value);
}
- // Enclosed Alphanumerics: U+247E.
- if (sequence == 0xe291be00) {
- if (value) {
- *value = 11;
- }
-
- return F_true;
+ // Enclosed Alphanumerics: U+2480 to U+2486.
+ if (sequence <= 0xe2928600) {
+ return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe2928000, 13, value);
}
- // Enclosed Alphanumerics: U+247F.
- if (sequence == 0xe291bf00) {
- if (value) {
- *value = 12;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2480.
- if (sequence == 0xe2928000) {
- if (value) {
- *value = 13;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2481.
- if (sequence == 0xe2928100) {
- if (value) {
- *value = 14;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2482.
- if (sequence == 0xe2928200) {
- if (value) {
- *value = 15;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2483.
- if (sequence == 0xe2928300) {
- if (value) {
- *value = 16;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2484.
- if (sequence == 0xe2928400) {
- if (value) {
- *value = 17;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2485.
- if (sequence == 0xe2928500) {
- if (value) {
- *value = 18;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2486.
- if (sequence == 0xe2928600) {
+ // Enclosed Alphanumerics: U+2487.
+ if (sequence == 0xe2928700) {
if (value) {
- *value = 19;
+ *value = 20;
}
return F_true;
return private_inline_f_utf_character_handle_digit_from_one(sequence, 0xe2928800, value);
}
- // Enclosed Alphanumerics: U+2491.
- if (sequence == 0xe2929100) {
- if (value) {
- *value = 10;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2492.
- if (sequence == 0xe2929200) {
- if (value) {
- *value = 11;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2493.
- if (sequence == 0xe2929300) {
- if (value) {
- *value = 12;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2494.
- if (sequence == 0xe2929400) {
- if (value) {
- *value = 13;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2495.
- if (sequence == 0xe2929500) {
- if (value) {
- *value = 14;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2496.
- if (sequence == 0xe2929600) {
- if (value) {
- *value = 15;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2497.
- if (sequence == 0xe2929700) {
- if (value) {
- *value = 16;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2498.
- if (sequence == 0xe2929800) {
- if (value) {
- *value = 17;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+2499.
- if (sequence == 0xe2929900) {
- if (value) {
- *value = 18;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+249A.
- if (sequence == 0xe2929a00) {
- if (value) {
- *value = 19;
- }
-
- return F_true;
+ // Enclosed Alphanumerics: U+2491 to U+249A.
+ if (sequence <= 0xe2929a00) {
+ return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe2929100, 10, value);
}
// Enclosed Alphanumerics: U+249B.
return F_true;
}
- // Enclosed Alphanumerics: U+24EB.
- if (sequence == 0xe293ab00) {
- if (value) {
- *value = 11;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+24EC.
- if (sequence == 0xe293ac00) {
- if (value) {
- *value = 12;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+24ED.
- if (sequence == 0xe293ad00) {
- if (value) {
- *value = 13;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+24EE.
- if (sequence == 0xe293ae00) {
- if (value) {
- *value = 14;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+24EF.
- if (sequence == 0xe293af00) {
- if (value) {
- *value = 15;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+24F0.
- if (sequence == 0xe293b000) {
- if (value) {
- *value = 16;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+24F1.
- if (sequence == 0xe293b100) {
- if (value) {
- *value = 17;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+24F2.
- if (sequence == 0xe293b200) {
- if (value) {
- *value = 18;
- }
-
- return F_true;
- }
-
- // Enclosed Alphanumerics: U+24F3.
- if (sequence == 0xe293b300) {
- if (value) {
- *value = 19;
- }
-
- return F_true;
+ // Enclosed Alphanumerics: U+24EB to U+24F3.
+ if (sequence <= 0xe293b300) {
+ return private_inline_f_utf_character_handle_digit_offset(sequence, 0xe293aa00, 10, value);
}
// Enclosed Alphanumerics: U+24F4.
return F_true;
}
+
+ // Kawi: U+11F50 to U+11F59.
+ if (sequence >= 0xf091bd90 && sequence <= 0xf091bd99) {
+ return private_inline_f_utf_character_handle_digit(sequence, 0xf091bd90, value);
+ }
}
else if (macro_f_utf_char_t_to_char_2(sequence) == 0x92) {
return private_inline_f_utf_character_handle_digit(sequence, 0xf096ba80, value);
}
- // Medefaidrin: U+16E8A.
- if (sequence == 0xf096ba8a) {
- if (value) {
- *value = 10;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E8B.
- if (sequence == 0xf096ba8b) {
- if (value) {
- *value = 11;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E8C.
- if (sequence == 0xf096ba8c) {
- if (value) {
- *value = 12;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E8D.
- if (sequence == 0xf096ba8d) {
- if (value) {
- *value = 13;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E8E.
- if (sequence == 0xf096ba8e) {
- if (value) {
- *value = 14;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E8F.
- if (sequence == 0xf096ba8f) {
- if (value) {
- *value = 15;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E90.
- if (sequence == 0xf096ba90) {
- if (value) {
- *value = 16;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E91.
- if (sequence == 0xf096ba91) {
- if (value) {
- *value = 17;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E92.
- if (sequence == 0xf096ba92) {
- if (value) {
- *value = 18;
- }
-
- return F_true;
- }
-
- // Medefaidrin: U+16E93.
- if (sequence == 0xf096ba93) {
- if (value) {
- *value = 19;
- }
-
- return F_true;
+ // Medefaidrin: U+16E8A to U+16E93.
+ if (sequence <= 0xf096ba93) {
+ return private_inline_f_utf_character_handle_digit_offset(sequence, 0xf096ba8a, 10, value);
}
// Medefaidrin: U+16E94 to U+16E96.
}
else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9d) {
+ // Kaktovik Numerals: U+1D2C0 to U+1D2D3.
+ if (sequence >= 0xf09d8b80 && sequence <= 0xf09d8b93) {
+
+ // Kaktovik Numerals: U+1D2C0 to U+1D2C9.
+ if (sequence <= 0xf09d8b89) {
+ return private_inline_f_utf_character_handle_digit(sequence, 0xf09d8b80, value);
+ }
+
+ // Kaktovik Numerals: U+1D2CA to U+1D2D3.
+ return private_inline_f_utf_character_handle_digit_offset(sequence, 0xf09d8b8a, 10, value);
+ }
+
// Mayan Numerals: U+1D2E0 to U+1D2F3.
if (sequence >= 0xf09d8ba0 && sequence <= 0xf09d8bb3) {
return private_inline_f_utf_character_handle_digit(sequence, 0xf09d8ba0, value);
}
- // Mayan Numerals: U+1D2EA.
- if (sequence == 0xf09d8baa) {
- if (value) {
- *value = 10;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2EB.
- if (sequence == 0xf09d8bab) {
- if (value) {
- *value = 11;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2EC.
- if (sequence == 0xf09d8bac) {
- if (value) {
- *value = 12;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2ED.
- if (sequence == 0xf09d8bad) {
- if (value) {
- *value = 13;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2EE.
- if (sequence == 0xf09d8bae) {
- if (value) {
- *value = 14;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2EF.
- if (sequence == 0xf09d8baf) {
- if (value) {
- *value = 15;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2F0.
- if (sequence == 0xf09d8bb0) {
- if (value) {
- *value = 16;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2F1.
- if (sequence == 0xf09d8bb1) {
- if (value) {
- *value = 17;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2F2.
- if (sequence == 0xf09d8bb2) {
- if (value) {
- *value = 18;
- }
-
- return F_true;
- }
-
- // Mayan Numerals: U+1D2F3.
- if (value) {
- *value = 19;
- }
-
- return F_true;
+ // Mayan Numerals: U+1D2EA to U+1D2F3.
+ return private_inline_f_utf_character_handle_digit_offset(sequence, 0xf09d8baa, 10, value);
}
// Counting Rod Numerals: U+1D360 to U+1D378.
}
else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9e) {
+ // Nag Mundari: U+1E4F0 to U+1E4F9.
+ if (sequence >= 0xf09e93b0 && sequence <= 0xf09e93b9) {
+ return private_inline_f_utf_character_handle_digit(sequence, 0xf09e93b0, value);
+ }
+
// Mende Kikakui: U+1E8C7 to U+1E8CF.
if (sequence >= 0xf09ea387 && sequence <= 0xf09ea38f) {
return private_inline_f_utf_character_handle_digit_from_one(sequence, 0xf09ea387, value);
return F_true;
}
- // Transport and Map Symbols: U+1F6DD to U+1F6DF.
- if (sequence >= 0xf09f9b9d && sequence <= 0xf09f9b9f) {
+ // Transport and Map Symbols: U+1F6DC to U+1F6DF.
+ if (sequence >= 0xf09f9b9c && sequence <= 0xf09f9b9f) {
return F_true;
}
return F_true;
}
- // Symbols and Pictographs Extended-A: U+1FA70 to U+1FA74.
- if (sequence >= 0xf09fa9b0 && sequence <= 0xf09fa9b4) {
+ // Symbols and Pictographs Extended-A: U+1FA70 to U+1FA7C.
+ if (sequence >= 0xf09fa9b0 && sequence <= 0xf09fa9bc) {
return F_true;
}
- // Symbols and Pictographs Extended-A: U+1FA78 to U+1FA7C.
- if (sequence >= 0xf09fa9b8 && sequence <= 0xf09fa9bc) {
+ // Symbols and Pictographs Extended-A: U+1FA80 to U+1FA88.
+ if (sequence >= 0xf09faa80 && sequence <= 0xf09faa88) {
return F_true;
}
- // Symbols and Pictographs Extended-A: U+1FA80 to U+1FA86.
- if (sequence >= 0xf09faa80 && sequence <= 0xf09faa86) {
+ // Symbols and Pictographs Extended-A: U+1FA90 to U+1FABD.
+ if (sequence >= 0xf09faa90 && sequence <= 0xf09faabd) {
return F_true;
}
- // Symbols and Pictographs Extended-A: U+1FA90 to U+1FAAC.
- if (sequence >= 0xf09faa90 && sequence <= 0xf09faaac) {
+ // Symbols and Pictographs Extended-A: U+1FABF to U+1FAC5.
+ if (sequence >= 0xf09faabf && sequence <= 0xf09fab85) {
return F_true;
}
- // Symbols and Pictographs Extended-A: U+1FAB0 to U+1FABA.
- if (sequence >= 0xf09faab0 && sequence <= 0xf09faaba) {
+ // Symbols and Pictographs Extended-A: U+1FACE to U+1FADB.
+ if (sequence >= 0xf09fab8e && sequence <= 0xf09fab9b) {
return F_true;
}
- // Symbols and Pictographs Extended-A: U+1FAC0 to U+1FAC5.
- if (sequence >= 0xf09fab80 && sequence <= 0xf09fab85) {
+ // Symbols and Pictographs Extended-A: U+1FAE0 to U+1FAE8.
+ if (sequence >= 0xf09faba0 && sequence <= 0xf09faba8) {
return F_true;
}
- // Symbols and Pictographs Extended-A: U+1FAD0 to U+1FAD9.
- if (sequence >= 0xf09fab90 && sequence <= 0xf09fab99) {
- return F_true;
- }
-
- // Symbols and Pictographs Extended-A: U+1FAE0 to U+1FAE7.
- if (sequence >= 0xf09faba0 && sequence <= 0xf09faba7) {
- return F_true;
- }
-
- // Symbols and Pictographs Extended-A: U+1FAF0 to U+1FAF6.
- if (sequence >= 0xf09fabb0 && sequence <= 0xf09fabb6) {
+ // Symbols and Pictographs Extended-A: U+1FAF0 to U+1FAF8.
+ if (sequence >= 0xf09fabb0 && sequence <= 0xf09fabb8) {
return F_true;
}
}
return F_true;
}
+ // Kawi: U+11F50 to U+11F59.
+ if (sequence >= 0xf091bd90 && sequence <= 0xf091bd99) {
+ return F_true;
+ }
+
// Tamil Supplement: U+11FC0 to U+11FD4.
if (sequence >= 0xf091bf80 && sequence <= 0xf091bf94) {
return F_true;
}
else if (macro_f_utf_char_t_to_char_2(sequence) == 0x9d) {
+ // Kaktovik Numerals: U+1D2C0 to U+1D2D3.
+ if (sequence >= 0xf09d8b80 && sequence <= 0xf09d8b93) {
+ return F_true;
+ }
+
// Mayan Numerals: U+1D2E0 to U+1D2F3.
if (sequence >= 0xf09d8ba0 && sequence <= 0xf09d8bb3) {
return F_true;
return F_true;
}
+ // Nag Mundari: U+1E4F0 to U+1E4F9.
+ if (sequence >= 0xf09e93b0 && sequence <= 0xf09e93b9) {
+ return F_true;
+ }
+
// Mende Kikakui: U+1E8C7 to U+1E8CF.
if (sequence >= 0xf09ea387 && sequence <= 0xf09ea38f) {
return F_true;
return F_true;
}
+ // Kawi: U+11F43 to U+11F4F.
+ if (sequence >= 0xf091bd83 && sequence <= 0xf091bd8f) {
+ return F_true;
+ }
+
// Tamil Supplement: U+11FFF.
if (sequence == 0xf091bfbf) {
return F_true;
return F_true;
}
- // Alchemical Symbols: U+1F6F0 to U+1F773.
- if (sequence >= 0xf09f9c80 && sequence <= 0xf09f9db3) {
+ // Alchemical Symbols: U+1F6F0 to U+1F776.
+ if (sequence >= 0xf09f9c80 && sequence <= 0xf09f9db6) {
return F_true;
}
- // Geometric Shapes Extended: U+1F780 to U+1F7D8.
- if (sequence >= 0xf09f9e80 && sequence <= 0xf09f9f98) {
+ // Alchemical Symbols: U+1F77B to U+1F77F.
+ if (sequence >= 0xf09f9dbb && sequence <= 0xf09f9dbf) {
+ return F_true;
+ }
+
+ // Geometric Shapes Extended: U+1F780 to U+1F7D9.
+ if (sequence >= 0xf09f9e80 && sequence <= 0xf09f9f99) {
return F_true;
}
return F_false;
}
- // CJK Unified Ideographs Extension C .. CJK Unified Ideographs Extension E: U+2A700 to U+2CEA1.
- if (sequence >= 0xf0aa9c80 && sequence <= 0xf0acbaa1) {
- return F_true;
- }
-
- // CJK Compatibility Ideographs Supplement: U+2F800 to U+2FA1D.
- if (sequence >= 0xf0afa080 && sequence <= 0xf0afa89d) {
+ // CJK Unified Ideographs Extension B .. CJK Unified Ideographs Extension H: U+20000 to U+323AF.
+ if (sequence >= 0xf0a08080 && sequence <= 0xf0b28eaf) {
return F_true;
}
return F_true;
}
- // Kannada: U+0CF3 to U+0CFF.
- if (character >= 0xe0b3b300 && character <= 0xe0b3bf00) {
+ // Kannada: U+0CF4 to U+0CFF.
+ if (character >= 0xe0b3b400 && character <= 0xe0b3bf00) {
return F_true;
}
return F_true;
}
- // Lao: U+0ECE, U+0ECF, U+0EDA, U+0EDB.
- if (character == 0xe0bb8e00 || character == 0xe0bb8f00 || character == 0xe0bb9a00 || character == 0xe0bb9b00) {
+ // Lao: U+0ECF, U+0EDA, U+0EDB.
+ if (character == 0xe0bb8f00 || character == 0xe0bb9a00 || character == 0xe0bb9b00) {
return F_true;
}
return F_true;
}
+ // Arabic Extended-C: U+10EC0 to U+10EFC.
+ if (character >= 0xf090bb80 && character <= 0xf090bbbc) {
+ return F_true;
+ }
+
// Brahmi: U+1104E to U+11051.
if (character >= 0xf091818e && character <= 0xf0918191) {
return F_true;
return F_true;
}
+ // Devanagari Extended A: U+11B0A to U+11B5F.
+ if (character >= 0xf091ac8a && character <= 0xf091ad9f) {
+ return F_true;
+ }
+
+ // Kawi: U+11F11.
+ if (character == 0xf091bc91) {
+ return F_true;
+ }
+
+ // Kawi: U+11F3B to U+11F3D.
+ if (character >= 0xf091bcbb && character <= 0xf091bcbd) {
+ return F_true;
+ }
+
+ // Kawi: U+11F5A to U+11F5F.
+ if (character >= 0xf091bd9a && character <= 0xf091bd9f) {
+ return F_true;
+ }
+
// Supplemental Symbols and Pictographs: U+1F9C1 to U+1FFFD.
if (character >= 0xf09fa781 && character <= 0xf09fbfbd) {
return F_true;
return F_true;
}
- // Khojki: U+1123E to U+1124F.
- if (character >= 0xf09188be && character <= 0xf091898f) {
+ // Khojki: U+11242 to U+1124F.
+ if (character >= 0xf0918982 && character <= 0xf091898f) {
return F_true;
}
return F_true;
}
- // Egyptian Hieroglyphs: U+1342F.
- if (character == 0xf09390af) {
+ // Egyptian Hieroglyph Format Controls: U+13456 to U+1345F.
+ if (character >= 0xf0939196 && character <= 0xf093919f) {
return F_true;
}
return F_true;
}
+ // Small Kana Extension: U+1B130, U+1B131.
+ if (character == 0xf09b84b0 || character == 0xf09b84b1) {
+ return F_true;
+ }
+
+ // Small Kana Extension: U+1B133 to U+1B14F.
+ if (character >= 0xf09b84b3 && character <= 0xf09b858f) {
+ return F_true;
+ }
+
+ // Small Kana Extension: U+1B153, U+1B154.
+ if (character == 0xf09b8593 || character == 0xf09b8594) {
+ return F_true;
+ }
+
+ // Small Kana Extension: U+1B156 to U+1B163.
+ if (character >= 0xf09b8596 && character <= 0xf09b85a3) {
+ return F_true;
+ }
+
+ // Small Kana Extension: U+1B168 to U+1B16F.
+ if (character >= 0xf09b85a8 && character <= 0xf09b85af) {
+ return F_true;
+ }
+
// Duployan: U+1BC6B to U+1BC6F.
if (character >= 0xf09bb1ab && character <= 0xf09bb1af) {
return F_true;
return F_true;
}
+ // Kaktovik Numerals: U+1D2D4 to U+1D2DF.
+ if (character >= 0xf09d8b94 && character <= 0xf09d8b9f) {
+ return F_true;
+ }
+
// Counting Rod Numerals: U+1D372 to U+1D37F.
if (character >= 0xf09d8db2 && character <= 0xf09d8dbf) {
return F_true;
return F_true;
}
+ // Latin Extended-G: U+1DF1F to U+1DF24.
+ if (character >= 0xf09dbc9f && character <= 0xf09dbca4) {
+ return F_true;
+ }
+
+ // Latin Extended-G: U+1DF2B to U+1DFFF.
+ if (character >= 0xf09dbcab && character <= 0xf09dbfbf) {
+ return F_true;
+ }
+
+ // Cyrillic Extended-D: U+1E06E to U+1E08E.
+ if (character >= 0xf09e81ae && character <= 0xf09e828e) {
+ return F_true;
+ }
+
+ // Nag Mundari: U+1E4FA to U+1E4FF.
+ if (character >= 0xf09e93ba && character <= 0xf09e93bf) {
+ return F_true;
+ }
+
// Mende Kikakui: U+1E8C5 to U+1E8C6.
if (character >= 0xf09ea385 && character <= 0xf09ea386) {
return F_true;
return F_true;
}
- // Transport and Map Symbols: U+1F6D1 to U+1F6DF.
- if (character >= 0xf09f9b91 && character <= 0xf09f9b9f) {
+ // Transport and Map Symbols: U+1F6D8 to U+1F6DB.
+ if (character >= 0xf09f9b98 && character <= 0xf09f9b9b) {
return F_true;
}
return F_true;
}
- // Transport and Map Symbols: U+1F6F4 to U+1F6FF.
- if (character >= 0xf09f9bb4 && character <= 0xf09f9bbf) {
+ // Transport and Map Symbols: U+1F6FD to U+1F6FF.
+ if (character >= 0xf09f9bbd && character <= 0xf09f9bbf) {
+ return F_true;
+ }
+
+ // Alchemical Symbols: U+1F777 to U+1F77A.
+ if (character >= 0xf09f9db7 && character <= 0xf09f9dba) {
+ return F_true;
+ }
+
+ // Geometric Shapes Extended: U+1F7DA to U+1F7DF.
+ if (character >= 0xf09f9f9a && character <= 0xf09f9f9f) {
return F_true;
}
- // Alchemical Symbols: U+1F774 to U+1F77F.
- if (character >= 0xf09f9db4 && character <= 0xf09f9dbf) {
+ // Geometric Shapes Extended: U+1F7EC to U+1F7EF.
+ if (character >= 0xf09f9fac && character <= 0xf09f9faf) {
return F_true;
}
- // Geometric Shapes Extended: U+1F7D5 to U+1F7FF.
- if (character >= 0xf09f9f95 && character <= 0xf09f9fbf) {
+ // Geometric Shapes Extended: U+1F7F1 to U+1F7FF.
+ if (character >= 0xf09f9fb1 && character <= 0xf09f9fbf) {
return F_true;
}
return F_true;
}
+ // Symbols and Pictographs Extended-A: U+1FA7D to U+1FA7F.
+ if (character >= 0xf09fa9bd && character <= 0xf09fa9bf) {
+ return F_true;
+ }
+
+ // Symbols and Pictographs Extended-A: U+1FA89 to U+1FA8F.
+ if (character >= 0xf09faa89 && character <= 0xf09faa8f) {
+ return F_true;
+ }
+
+ // Symbols and Pictographs Extended-A: U+1FABE.
+ if (character == 0xf09faabe) {
+ return F_true;
+ }
+
+ // Symbols and Pictographs Extended-A: U+1FAC6 to U+1FACD.
+ if (character >= 0xf09fab86 && character <= 0xf09fab8d) {
+ return F_true;
+ }
+
+ // Symbols and Pictographs Extended-A: U+1FADC to U+1FADF.
+ if (character >= 0xf09fab9c && character <= 0xf09fab9f) {
+ return F_true;
+ }
+
+ // Symbols and Pictographs Extended-A: U+1FAE9 to U+1FAEF.
+ if (character >= 0xf09faba9 && character <= 0xf09fabaf) {
+ return F_true;
+ }
+
+ // Symbols and Pictographs Extended-A: U+1FAF9 to U+1FAFF.
+ if (character >= 0xf09fabb9 && character <= 0xf09fabbf) {
+ return F_true;
+ }
+
// CJK Unified Ideographs Extension B: U+2A6D7 to U+2A6FF.
if (character >= 0xf0aa9b97 && character <= 0xf0aa9bbf) {
return F_true;
return F_true;
}
+ // CJK Unified Ideographs Extension F: U+2CEB2 to U+2CEBF.
+ if (character >= 0xf0acbab2 && character <= 0xf0acbabf) {
+ return F_true;
+ }
+
// CJK Compatibility Ideographs Supplement: U+2FA1E to U+2FFFD.
if (character >= 0xf0b08080 && character <= 0xf0afbfbd) {
return F_true;
}
- // Tertiary Ideographic Plane: U+30000 to U+3FFFD.
- if (character >= 0xf0b08080 && character <= 0xf0bfbfbd) {
+ // CJK Unified Ideographs Extension G: U+3134B to U+3134F.
+ if (character >= 0xf0b18d8b && character <= 0xf0b18d8f) {
+ return F_true;
+ }
+
+ // Tertiary Ideographic Plane: U+30000 to U+3134F.
+ if (character >= 0xf0b08080 && character <= 0xf0b18d8f) {
+ return F_true;
+ }
+
+ // Tertiary Ideographic Plane: U+323AF to U+3FFFD.
+ if (character >= 0xf0b28eaf && character <= 0xf0bfbfbd) {
return F_true;
}
14726029
14726050
14726051
+14726067
14726273
14726529
14726530
14727865
14727867
14727868
+14728078
14728344
14728345
14728373
4036012223
4036012965
4036012966
+4036017085
+4036017086
+4036017087
4036067457
4036067512
4036067513
4036069258
4036069259
4036069260
+4036082816
+4036082817
+4036082818
+4036082819
+4036082868
+4036082869
+4036082870
+4036082871
+4036082872
+4036082873
+4036082874
+4036082878
+4036082879
+4036083072
+4036083073
+4036083074
+4036919951
+4036924332
+4036924333
+4036924334
+4036924335
4036069551
4036069552
4036069553
49821
49822
49823
+49837
+55424
+55425
+55426
+55427
+55428
+55429
+55452
+56221
+56463
+14721954
+14786702
+14844043
+14844044
+14844045
+14844046
+14844047
+14844074
+14844075
+14844076
+14844077
+14844078
+14844320
+14844321
+14844322
+14844323
+14844324
+14844326
+14844327
+14844328
+14844329
+14844330
+14844331
+14844332
+14844333
+14844334
+14844335
+15711167
+15712185
+15712186
+15712187
+4036068029
+4036068237
+4036202672
+4036202673
+4036202674
+4036202675
+4036202676
+4036202677
+4036202678
+4036202679
+4036202680
+4036202681
+4036202682
+4036202683
+4036202684
+4036202685
+4036202686
+4036202687
+4036202880
+4036202881
+4036202882
+4036202883
+4036202884
+4036202885
+4036202886
+4036202887
+4036202888
+4036202889
+4036202890
+4036202891
+4036202892
+4036202893
+4036202894
+4036202895
+4036202896
+4036202897
+4036202898
+4036202899
+4036202900
+4036202901
14785454
14785455
14785456
-14845344
-14845345
-14845346
-14845347
-14845348
-14845349
-14845350
-14845351
-14845352
-14845353
-14845354
-14845355
-14845356
-14845357
-14845358
-14845359
-14845360
-14845361
-14845362
-14845363
-14845364
-14845365
-14845366
-14845367
-14845368
-14845369
-14845370
-14845371
-14845372
-14845373
-14845374
-14845375
-14845568
-14845569
-14845570
-14845573
-14845574
-14845575
-14845576
14909575
14909601
14909602
4036137388
4036137389
4036137390
+4036083088
+4036083089
+4036083090
+4036083091
+4036083092
+4036083093
+4036083094
+4036083095
+4036083096
+4036083097
+4036856704
+4036856705
+4036856706
+4036856707
+4036856708
+4036856709
+4036856710
+4036856711
+4036856712
+4036856713
+4036856714
+4036856715
+4036856716
+4036856717
+4036856718
+4036856719
+4036856720
+4036856721
+4036856722
+4036856723
+4036924336
+4036924337
+4036924338
+4036924339
+4036924340
+4036924341
+4036924342
+4036924343
+4036924344
+4036924345
4036991893
4036991894
4036991895
+4036991900
4036991901
4036991902
4036991903
4036995506
4036995507
4036995508
+4036995509
+4036995510
+4036995511
4036995512
4036995513
4036995514
4036995716
4036995717
4036995718
+4036995719
+4036995720
4036995728
4036995729
4036995730
4036995754
4036995755
4036995756
+4036995757
+4036995758
+4036995759
4036995760
4036995761
4036995762
4036995768
4036995769
4036995770
+4036995771
+4036995772
+4036995773
+4036995775
4036995968
4036995969
4036995970
4036995971
4036995972
4036995973
+4036995982
+4036995983
4036995984
4036995985
4036995986
4036995991
4036995992
4036995993
+4036995994
+4036995995
4036996000
4036996001
4036996002
4036996020
4036996021
4036996022
+4036996023
+4036996024
14844297
14845090
14845113
U+0CCD
U+0CE2
U+0CE3
+U+0CF3
U+0D01
U+0D41
U+0D42
U+0EB9
U+0EBB
U+0EBC
+U+0ECE
U+0F18
U+0F19
U+0F35
U+10A3F
U+10AE5
U+10AE6
+U+10EFD
+U+10EFE
+U+10EFF
U+11001
U+11038
U+11039
U+111CA
U+111CB
U+111CC
+U+11F00
+U+11F01
+U+11F02
+U+11F03
+U+11F34
+U+11F35
+U+11F36
+U+11F37
+U+11F38
+U+11F39
+U+11F3A
+U+11F3E
+U+11F3F
+U+11F40
+U+11F41
+U+11F42
+U+1E08F
+U+1E4EC
+U+1E4ED
+U+1E4EE
+U+1E4EF
U+1122F
U+11230
U+11231
U+009D
U+009E
U+009F
+U+00AD
+U+0600
+U+0601
+U+0602
+U+0603
+U+0604
+U+0605
+U+061C
+U+06DD
+U+070F
+U+08E2
+U+180E
+U+200B
+U+200C
+U+200D
+U+200E
+U+200F
+U+202A
+U+202B
+U+202C
+U+202D
+U+202E
+U+2060
+U+2061
+U+2062
+U+2063
+U+2064
+U+2066
+U+2067
+U+2068
+U+2069
+U+206A
+U+206B
+U+206C
+U+206D
+U+206E
+U+206F
+U+FEFF
+U+FFF9
+U+FFFA
+U+FFFB
+U+110BD
+U+110CD
+U+13430
+U+13431
+U+13432
+U+13433
+U+13434
+U+13435
+U+13436
+U+13437
+U+13438
+U+13439
+U+1343A
+U+1343B
+U+1343C
+U+1343D
+U+1343E
+U+1343F
+U+13440
+U+13441
+U+13442
+U+13443
+U+13444
+U+13445
+U+13446
+U+13447
+U+13448
+U+13449
+U+1344A
+U+1344B
+U+1344C
+U+1344D
+U+1344E
+U+1344F
+U+13450
+U+13451
+U+13452
+U+13453
+U+13454
+U+13455
U+16EE
U+16EF
U+16F0
-U+2160
-U+2161
-U+2162
-U+2163
-U+2164
-U+2165
-U+2166
-U+2167
-U+2168
-U+2169
-U+216A
-U+216B
-U+216C
-U+216D
-U+216E
-U+216F
-U+2170
-U+2171
-U+2172
-U+2173
-U+2174
-U+2175
-U+2176
-U+2177
-U+2178
-U+2179
-U+217A
-U+217B
-U+217C
-U+217D
-U+217E
-U+217F
-U+2180
-U+2181
-U+2182
-U+2185
-U+2186
-U+2187
-U+2188
U+3007
U+3021
U+3022
U+1246C
U+1246D
U+1246E
+U+11F50
+U+11F51
+U+11F52
+U+11F53
+U+11F54
+U+11F55
+U+11F56
+U+11F57
+U+11F58
+U+11F59
+U+1D2C0
+U+1D2C1
+U+1D2C2
+U+1D2C3
+U+1D2C4
+U+1D2C5
+U+1D2C6
+U+1D2C7
+U+1D2C8
+U+1D2C9
+U+1D2CA
+U+1D2CB
+U+1D2CC
+U+1D2CD
+U+1D2CE
+U+1D2CF
+U+1D2D0
+U+1D2D1
+U+1D2D2
+U+1D2D3
+U+1E4F0
+U+1E4F1
+U+1E4F2
+U+1E4F3
+U+1E4F4
+U+1E4F5
+U+1E4F6
+U+1E4F7
+U+1E4F8
+U+1E4F9
U+1F6D5
U+1F6D6
U+1F6D7
+U+1F6DC
U+1F6DD
U+1F6DE
U+1F6DF
U+1FA72
U+1FA73
U+1FA74
+U+1FA75
+U+1FA76
+U+1FA77
U+1FA78
U+1FA79
U+1FA7A
U+1FA84
U+1FA85
U+1FA86
+U+1FA87
+U+1FA88
U+1FA90
U+1FA91
U+1FA92
U+1FAAA
U+1FAAB
U+1FAAC
+U+1FAAD
+U+1FAAE
+U+1FAAF
U+1FAB0
U+1FAB1
U+1FAB2
U+1FAB8
U+1FAB9
U+1FABA
+U+1FABB
+U+1FABC
+U+1FABD
+U+1FABF
U+1FAC0
U+1FAC1
U+1FAC2
U+1FAC3
U+1FAC4
U+1FAC5
+U+1FACE
+U+1FACF
U+1FAD0
U+1FAD1
U+1FAD2
U+1FAF4
U+1FAF5
U+1FAF6
+U+1FAF7
+U+1FAF8
U+2049
U+2122
U+2139
17
18
19
-1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-11
-12
-50
-100
-500
-1000
-1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-11
-12
-50
-100
-500
-1000
-1000
-5000
-10000
-6
-50
-50000
-100000
0
1
2
7
8
9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
* API Version: 0.7
* Licenses: lgpl-2.1-or-later
*
- * This is intendend to support Unicode 14.0.
+ * This is intended to support Unicode 15.0.
*
* When using "text" mode, this program attempts to translate UTF-8 sequences such that certain codes don't cause printing problems.
* There may be cases where there are unknown codes that get printed and the invalid UTF-8 marker may be displayed not by this program but instead by the shell or some other program.
* API Version: 0.7
* Licenses: lgpl-2.1-or-later
*
- * This is intendend to support Unicode 14.0.
+ * This is intended to support Unicode 15.0.
*
* This is a program for handling basic UTF-8 related conversions.
* - Convert from UTF-8 character to bytesequence.