I did some research and learned that the "is*()" functions can greatly affect performance due to locale and other manners.
I originally used these to allow for well established optimization to take place.
Replace these with some mathematical operations that should increase performance.
This also means no function call on the stack.
This project is already function stack heavy by design and so reducing functions when easy is a great thing.
Start using literal characters rather than the standard strings for the UTF related functions.
In these cases the ASCII expectation is guaranteed.
The ability to override these is also not practical as the meaning should not change.
I have not looked at all of the "is*()" functions and I may address any remaining ones at a later time.
I potentially may also investigate mapping tables to further improve performance.
These math calculations can be used in a lot of the non-ASCII UTF ranges as well.
I opted to not do these just yet given that such work will take a large amount of time.
I have not done any performance analysis yet but I plan to do so.
#if !defined(_di_f_utf_character_is_alphabetic_digit_) || !defined(_di_f_utf_character_is_digit_) || !defined(_di_f_utf_character_is_word_) || !defined(_di_f_utf_character_is_word_dash_) || !defined(_di_f_utf_character_is_word_dash_plus_) || !defined(_di_f_utf_is_alphabetic_digit_) || !defined(_di_f_utf_is_digit_) || !defined(_di_f_utf_is_word_) || !defined(_di_f_utf_is_word_dash_) || !defined(_di_f_utf_is_word_dash_plus_)
f_status_t private_f_utf_character_is_digit_for_ascii(const f_char_t character, uint64_t * const value) {
- if (isdigit(character)) {
+ uint16_t result = (uint16_t) (character - '0');
+
+ if (result < 10) {
if (value) {
- if (character == f_string_ascii_0_s.string[0]) {
- *value = 0;
- }
- else if (character == f_string_ascii_1_s.string[0]) {
- *value = 1;
- }
- else if (character == f_string_ascii_2_s.string[0]) {
- *value = 2;
- }
- else if (character == f_string_ascii_3_s.string[0]) {
- *value = 3;
- }
- else if (character == f_string_ascii_4_s.string[0]) {
- *value = 4;
- }
- else if (character == f_string_ascii_5_s.string[0]) {
- *value = 5;
- }
- else if (character == f_string_ascii_6_s.string[0]) {
- *value = 6;
- }
- else if (character == f_string_ascii_7_s.string[0]) {
- *value = 7;
- }
- else if (character == f_string_ascii_8_s.string[0]) {
- *value = 8;
- }
- else if (character == f_string_ascii_9_s.string[0]) {
- *value = 9;
- }
- else if (character == f_string_ascii_a_s.string[0] || character == f_string_ascii_A_s.string[0]) {
- *value = 10;
- }
- else if (character == f_string_ascii_b_s.string[0] || character == f_string_ascii_B_s.string[0]) {
- *value = 11;
- }
- else if (character == f_string_ascii_c_s.string[0] || character == f_string_ascii_C_s.string[0]) {
- *value = 12;
- }
- else if (character == f_string_ascii_d_s.string[0] || character == f_string_ascii_D_s.string[0]) {
- *value = 13;
- }
- else if (character == f_string_ascii_e_s.string[0] || character == f_string_ascii_E_s.string[0]) {
- *value = 14;
- }
- else if (character == f_string_ascii_f_s.string[0] || character == f_string_ascii_F_s.string[0]) {
- *value = 15;
- }
- else {
- *value = F_type_size_max_64_unsigned_d;
- }
+ *value = (uint64_t) result;
+ }
+
+ return F_true;
+ }
+
+ result = (uint16_t) (character - 'A');
+
+ if (result > 5) {
+ result = (uint16_t) (character - 'a');
+ }
+
+ if (result < 6) {
+ if (value) {
+ *value = (uint64_t) (10 + result);
}
return F_true;
* F_true if a UTF-8 decimal character.
* F_false if not a UTF-8 decimal character.
*
- * @see isdigit()
- *
* @see f_utf_character_is_alphabetic_digit()
* @see f_utf_character_is_digit()
* @see f_utf_character_is_word()
i = length;
}
else {
- if (macro_f_utf_char_t_to_char_1(string[i]) == f_string_ascii_u_s.string[0] || macro_f_utf_char_t_to_char_1(string[i]) == f_string_ascii_U_s.string[0]) {
+ if (macro_f_utf_char_t_to_char_1(string[i]) == 'u' || macro_f_utf_char_t_to_char_1(string[i]) == 'U') {
do {
++i;
} while (i < length && !string[i]);
- if (i < length && !macro_f_utf_char_t_width_is(string[i]) && macro_f_utf_char_t_to_char_1(string[i]) == f_string_ascii_plus_s.string[0]) {
+ if (i < length && !macro_f_utf_char_t_width_is(string[i]) && macro_f_utf_char_t_to_char_1(string[i]) == '+') {
++i;
}
else {
} // while
if (i < length) {
- if (string[i] == f_string_ascii_u_s.string[0] || string[i] == f_string_ascii_U_s.string[0]) {
+ if (string[i] == 'u' || string[i] == 'U') {
do {
++i;
} while (i < length && !string[i]);
- if (i < length && string[i] == f_string_ascii_plus_s.string[0]) {
+ if (i < length && string[i] == '+') {
++i;
}
else {
return private_f_utf_character_is_alphabetic(utf);
}
- if (isalpha(*sequence)) return F_true;
-
- return F_false;
+ return (uint16_t) ((*sequence) - 'a') < 26 || (uint16_t) ((*sequence) - 'A') < 26
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_is_alphabetic_
return private_f_utf_character_is_alphabetic_digit(utf, value);
}
- if (isalpha(*sequence)) return F_true;
+ if (private_f_utf_character_is_digit_for_ascii(*sequence, value)) return F_true;
- return private_f_utf_character_is_digit_for_ascii(*sequence, value);
+ return (uint16_t) (*sequence - 'a') < 26 || (uint16_t) (*sequence - 'A') < 26
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_is_alphabetic_digit_
return private_f_utf_character_is_alphabetic_numeric(utf);
}
- if (isalnum(*sequence)) return F_true;
-
- return F_false;
+ return (uint16_t) (*sequence - 'a') < 26 || (uint16_t) (*sequence - 'A') < 26 || (uint16_t) (*sequence - '0') < 10
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_is_alphabetic_numeric_
return private_f_utf_character_is_numeric(utf);
}
- if (isdigit(*sequence)) return F_true;
-
- return F_false;
+ return (uint16_t) (*sequence - '0') < 10 ? F_true : F_false;
}
#endif // _di_f_utf_is_numeric_
return private_f_utf_character_is_word(utf, strict);
}
- if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0]) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (*sequence - 'a') < 26 || (uint16_t) (*sequence - 'A') < 26 || (uint16_t) (*sequence - '0') < 10 || *sequence == '_'
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_is_word_
return private_f_utf_character_is_word_dash(utf, strict);
}
- if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0] || *sequence == f_string_ascii_minus_s.string[0]) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (*sequence - 'a') < 26 || (uint16_t) (*sequence - 'A') < 26 || (uint16_t) (*sequence - '0') < 10 || *sequence == '_' || *sequence == '-'
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_is_word_dash_
return private_f_utf_character_is_word_dash_plus(utf, strict);
}
- if (isalnum(*sequence) || *sequence == f_string_ascii_underscore_s.string[0] || *sequence == f_string_ascii_minus_s.string[0] || *sequence == f_string_ascii_plus_s.string[0]) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (*sequence - 'a') < 26 || (uint16_t) (*sequence - 'A') < 26 || (uint16_t) (*sequence - '0') < 10 || *sequence == '_' || *sequence == '-' || *sequence == '+'
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_is_word_dash_plus_
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
- *
- * @see isalpha()
*/
#ifndef _di_f_utf_is_alphabetic_
extern f_status_t f_utf_is_alphabetic(const f_string_t sequence, const f_array_length_t width_max);
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
- *
- * @see isalpha()
- * @see isdigit()
*/
#ifndef _di_f_utf_is_alphabetic_digit_
extern f_status_t f_utf_is_alphabetic_digit(const f_string_t sequence, const f_array_length_t width_max, uint64_t * const value);
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
- *
- * @see isalnum()
*/
#ifndef _di_f_utf_is_alphabetic_numeric_
extern f_status_t f_utf_is_alphabetic_numeric(const f_string_t sequence, const f_array_length_t width_max);
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
- *
- * @see isdigit()
*/
#ifndef _di_f_utf_is_digit_
extern f_status_t f_utf_is_digit(const f_string_t sequence, const f_array_length_t width_max, uint64_t * const value);
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
- *
- * @see isdigit()
*/
#ifndef _di_f_utf_is_numeric_
extern f_status_t f_utf_is_numeric(const f_string_t sequence, const f_array_length_t width_max);
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
- *
- * @see isalnum()
*/
#ifndef _di_f_utf_is_word_
extern f_status_t f_utf_is_word(const f_string_t sequence, const f_array_length_t width_max, const bool strict);
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
- *
- * @see isalnum()
*/
#ifndef _di_f_utf_is_word_dash_
extern f_status_t f_utf_is_word_dash(const f_string_t sequence, const f_array_length_t width_max, const bool strict);
* F_complete_not_utf (with error bit set) if character is an incomplete UTF-8 sequence.
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if Unicode is an invalid Unicode character.
- *
- * @see isalnum()
*/
#ifndef _di_f_utf_is_word_dash_plus_
extern f_status_t f_utf_is_word_dash_plus(const f_string_t sequence, const f_array_length_t width_max, const bool strict);
return private_f_utf_character_is_alphabetic(sequence);
}
- if (isalpha(macro_f_utf_char_t_to_char_1(sequence))) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'a') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'A') < 26
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_character_is_alphabetic_
return private_f_utf_character_is_alphabetic_digit(sequence, value);
}
- if (isalpha(macro_f_utf_char_t_to_char_1(sequence))) return F_true;
+ if (private_f_utf_character_is_digit_for_ascii(macro_f_utf_char_t_to_char_1(sequence), value)) return F_true;
- return private_f_utf_character_is_digit_for_ascii(macro_f_utf_char_t_to_char_1(sequence), value);
+ return (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'a') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'A') < 26
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_character_is_alphabetic_digit_
return private_f_utf_character_is_alphabetic_numeric(sequence);
}
- if (isalnum(macro_f_utf_char_t_to_char_1(sequence))) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'a') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'A') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - '0') < 10
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_character_is_alphabetic_numeric_
return private_f_utf_character_is_numeric(sequence);
}
- if (isdigit(macro_f_utf_char_t_to_char_1(sequence))) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - '0') < 10 ? F_true : F_false;
}
#endif // _di_f_utf_character_is_numeric_
return private_f_utf_character_is_word(sequence, strict);
}
- if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_underscore_s.string[0]) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'a') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'A') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - '0') < 10 || macro_f_utf_char_t_to_char_1(sequence) == '_'
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_character_is_word_
return private_f_utf_character_is_word_dash(sequence, strict);
}
- if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_underscore_s.string[0] || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_minus_s.string[0]) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'a') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'A') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - '0') < 10 || macro_f_utf_char_t_to_char_1(sequence) == '_' || macro_f_utf_char_t_to_char_1(sequence) == '-'
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_character_is_word_dash_
return private_f_utf_character_is_word_dash_plus(sequence, strict);
}
- if (isalnum(macro_f_utf_char_t_to_char_1(sequence)) || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_underscore_s.string[0] || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_minus_s.string[0] || macro_f_utf_char_t_to_char_1(sequence) == f_string_ascii_plus_s.string[0]) {
- return F_true;
- }
-
- return F_false;
+ return (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'a') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - 'A') < 26 || (uint16_t) (macro_f_utf_char_t_to_char_1(sequence) - '0') < 10 || macro_f_utf_char_t_to_char_1(sequence) == '_' || macro_f_utf_char_t_to_char_1(sequence) == '-' || macro_f_utf_char_t_to_char_1(sequence) == '+'
+ ? F_true
+ : F_false;
}
#endif // _di_f_utf_character_is_word_dash_plus_
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
- *
- * @see isalpha()
*/
#ifndef _di_f_utf_character_is_alphabetic_
extern f_status_t f_utf_character_is_alphabetic(const f_utf_char_t sequence);
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
- *
- * @see isalpha()
- * @see isdigit()
*/
#ifndef _di_f_utf_character_is_alphabetic_digit_
extern f_status_t f_utf_character_is_alphabetic_digit(const f_utf_char_t sequence, uint64_t * const value);
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
- *
- * @see isalnum()
*/
#ifndef _di_f_utf_character_is_alphabetic_numeric_
extern f_status_t f_utf_character_is_alphabetic_numeric(const f_utf_char_t sequence);
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
- *
- * @see isdigit()
*/
#ifndef _di_f_utf_character_is_digit_
extern f_status_t f_utf_character_is_digit(const f_utf_char_t sequence, uint64_t * const value);
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
- *
- * @see isdigit()
*/
#ifndef _di_f_utf_character_is_numeric_
extern f_status_t f_utf_character_is_numeric(const f_utf_char_t sequence);
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
- *
- * @see isalnum()
*/
#ifndef _di_f_utf_character_is_word_
extern f_status_t f_utf_character_is_word(const f_utf_char_t sequence, const bool strict);
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
- *
- * @see isalnum()
*/
#ifndef _di_f_utf_character_is_word_dash_
extern f_status_t f_utf_character_is_word_dash(const f_utf_char_t sequence, const bool strict);
*
* F_utf_fragment (with error bit) if character is a UTF-8 fragment.
* F_utf_not (with error bit) if unicode is an invalid Unicode character.
- *
- * @see isalnum()
*/
#ifndef _di_f_utf_character_is_word_dash_plus_
extern f_status_t f_utf_character_is_word_dash_plus(const f_utf_char_t sequence, const bool strict);
return F_found_not;
}
- if (isxdigit(buffer.string[range->start])) {
+ if ((uint16_t) (buffer.string[range->start] - 'a') < 6 || (uint16_t) (buffer.string[range->start] - 'A') < 6 || (uint16_t) (buffer.string[range->start] - '0') < 10) {
number[j] = buffer.string[range->start];
}
else {
* F_parameter (with error bit) from: f_utf_is_whitespace().
* F_parameter (with error bit) from: f_utf_is_word().
*
- * @see isxdigit()
- *
* @see f_utf_is_combining()
* @see f_utf_is_whitespace()
* @see f_utf_is_word()
f_file_read_block(file, &response);
- if (!response.used || response.string[0] == f_string_ascii_1_s.string[0]) {
+ if (!response.used || response.string[0] == '1') {
f_string_dynamic_resize(0, &response);
close(descriptors[0]);
for (uint8_t j = 2; i < source.used && j < 8; ) {
- if (!isdigit(source.string[i])) {
+ if ((uint16_t) (source.string[i] - '0') > 9) {
if (!(source.string[i] == f_string_ascii_A_s.string[0] ||
source.string[i] == f_string_ascii_B_s.string[0] ||
source.string[i] == f_string_ascii_C_s.string[0] ||
if (!name.used) return F_none;
- if (!isalpha(name.string[0]) && name.string[0] != '_') {
- return F_false;
+ if (!((uint16_t) (name.string[0] - 'a') < 26 || (uint16_t) (name.string[0] - 'A') < 26)) {
+ if (name.string[0] != '_') return F_false;
}
for (f_array_length_t i = 1; i < name.used; ++i) {
- if (!isalnum(name.string[i]) && name.string[i] != '_') {
- return F_false;
+ if (!((uint16_t) (name.string[i] - 'a') < 26 || (uint16_t) (name.string[i] - 'A') < 26 || (uint16_t) (name.string[i] - '0') < 10)) {
+ if (name.string[i] != '_') return F_false;
}
} // for
* F_true on valid.
* F_false on invalid.
* F_none if there is no string to validate (used = 0).
- *
- * @see isalpha()
- * @see isalnum()
*/
#ifndef _di_fake_make_operate_validate_define_name_
extern f_status_t fake_make_operate_validate_define_name(const f_string_static_t name) F_attribute_visibility_internal_d;