f_array_length_t i1 = offset1;
f_array_length_t i2 = offset2;
-
+ f_array_length_t previous = 0;
f_status_t status = F_none;
// Skip past leading whitespace in string1.
- for (; i1 < stop1; ++i1) {
+ for (; i1 < stop1; i1 += macro_f_utf_byte_width(string1[i1])) {
// Skip past NULL in string1.
while (i1 < stop1 && !string1[i1]) ++i1;
status = f_utf_character_is_whitespace(string1[i1], F_false);
if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_parameter) return status;
- // Ignore possibly invalid UTF-8 codes.
- if (F_status_set_fine(status) != F_maybe) {
- return status;
- }
+ break;
}
if (status == F_false) break;
+
+ status = f_utf_character_is_combining(string1[i1]);
+
+ if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_parameter) return status;
+
+ break;
+ }
+
+ // This is a combining character, so the previous character is no longer considered a space.
+ if (status == F_true) {
+ i1 = previous;
+
+ break;
+ }
+
+ previous = i1;
} // for
// Skip past leading whitespace in string2.
- for (; i2 < stop2; i2++) {
+ for (; i2 < stop2; i2 += macro_f_utf_byte_width(string2[i2])) {
// Skip past NULL in string2.
while (i2 < stop2 && !string2[i2]) ++i2;
status = f_utf_character_is_whitespace(string2[i2], F_false);
if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_parameter) return status;
- // Ignore possibly invalid UTF-8 codes.
- if (F_status_set_fine(status) != F_maybe) {
- return status;
- }
+ break;
}
if (status == F_false) break;
+
+ status = f_utf_character_is_combining(string2[i2]);
+
+ if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_parameter) return status;
+
+ break;
+ }
+
+ // This is a combining character, so the previous character is no longer considered a space.
+ if (status == F_true) {
+ i2 = previous;
+
+ break;
+ }
+
+ previous = i2;
} // for
f_array_length_t last1 = i1;
f_array_length_t last2 = i2;
{
- // The size1 and size2 are to represent to total number of characters after trim.
+ // Size1 and size2 are to represent to total number of characters after trim.
f_array_length_t size1 = 0;
f_array_length_t size2 = 0;
- f_array_length_t j = 0;
+
+ previous = i1;
// Determine where the last non-whitespace is in string1.
- for (j = i1; j < stop1; ++j) {
+ for (f_array_length_t j = i1; j < stop1; j += macro_f_utf_byte_width(string1[j])) {
// Skip past NULL in string1.
while (j < stop1 && !string1[j]) ++j;
status = f_utf_character_is_whitespace(string1[j], F_false);
if (F_status_is_error(status)) {
- // ignore possibly invalid UTF-8 codes.
- if (F_status_set_fine(status) != F_maybe) {
- return status;
- }
+ if (F_status_set_fine(status) == F_parameter) return status;
+
+ break;
}
if (status == F_false) {
+ status = f_utf_character_is_combining(string1[j]);
+
+ if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_parameter) return status;
+ }
+
+ // This is a combining character, so the previous character is no longer considered a space.
+ if (status == F_true) {
+ if (last1 != previous) {
+ size1 -= macro_f_utf_byte_width(string1[last1]);
+ last1 = previous;
+ }
+ }
+ else {
+ last1 = j;
+ size1 += macro_f_utf_byte_width(string1[last1]);
+ previous = j;
+ }
+ }
+ else if (F_status_is_error(status)) {
last1 = j;
- ++size1;
+ size1 += macro_f_utf_byte_width(string1[last1]);
+ previous = j;
+ }
+ else {
+ previous = j;
}
} // for
+ previous = i2;
+
// Determine where the last non-whitespace is in string2.
- for (j = i2; j < stop2; ++j) {
+ for (f_array_length_t j = i2; j < stop2; j += macro_f_utf_byte_width(string2[j])) {
// Skip past NULL in string2.
while (j < stop2 && !string2[j]) ++j;
status = f_utf_character_is_whitespace(string2[j], F_false);
if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_parameter) return status;
- // Ignore possibly invalid UTF-8 codes.
- if (F_status_set_fine(status) != F_maybe) {
- return status;
- }
+ break;
}
if (status == F_false) {
+ status = f_utf_character_is_combining(string2[j]);
+
+ if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_parameter) return status;
+ }
+
+ // This is a combining character, so the previous character is no longer considered a space.
+ if (status == F_true) {
+ if (last2 != previous) {
+ size2 -= macro_f_utf_byte_width(string2[last2]);
+ last2 = previous;
+ }
+ }
+ else {
+ last2 = j;
+ size2 += macro_f_utf_byte_width(string2[last2]);
+ previous = j;
+ }
+ }
+ else if (F_status_is_error(status)) {
last2 = j;
- ++size2;
+ size2 += macro_f_utf_byte_width(string2[last2]);
+ previous = j;
+ }
+ else {
+ previous = j;
}
} // for
- if (size1 != size2) {
- return F_equal_to_not;
- }
+ if (size1 != size2) return F_equal_to_not;
}
- for (; i1 < last1 && i2 < last2; ++i1, ++i2) {
+ if (last1 < stop1 && last2 < stop2) {
+ for (; i1 < last1 && i2 < last2; ++i1, ++i2) {
- // Skip past NULL in string1.
- while (i1 < last1 && !string1[i1]) ++i1;
- if (i1 == last1) break;
+ // Skip past NULL in string1.
+ while (i1 < last1 && !string1[i1]) ++i1;
+ if (i1 == last1) break;
- // Skip past NULL in string2.
- while (i2 < last2 && !string2[i2]) ++i2;
- if (i2 == last2) break;
+ // Skip past NULL in string2.
+ while (i2 < last2 && !string2[i2]) ++i2;
+ if (i2 == last2) break;
- if (string1[i1] != string2[i2]) return F_equal_to_not;
- } // for
+ if (string1[i1] != string2[i2]) return F_equal_to_not;
+ } // for
+ }
// Only return F_equal_to if all remaining characters are NULL.
- for (; i1 < last1; ++i1) {
- if (string1[i1] != 0) return F_equal_to_not;
- } // for
+ if (last1 < stop1) {
+ for (; i1 < last1; ++i1) {
+ if (string1[i1] != 0) return F_equal_to_not;
+ } // for
+ }
- for (; i2 < last2; ++i2) {
- if (string2[i2] != 0) return F_equal_to_not;
- } // for
+ if (last2 < stop2) {
+ for (; i2 < last2; ++i2) {
+ if (string2[i2] != 0) return F_equal_to_not;
+ } // for
+ }
return F_equal_to;
}
#ifndef _di_fl_utf_string_dynamic_partial_compare_
f_status_t fl_utf_string_dynamic_partial_compare(const f_utf_string_static_t string1, const f_utf_string_static_t string2, const f_string_range_t range1, const f_string_range_t range2) {
- #ifndef _di_level_1_parameter_checking_
- if (string1.used <= range1.stop) return F_status_set_error(F_parameter);
- if (string2.used <= range2.stop) return F_status_set_error(F_parameter);
- #endif // _di_level_1_parameter_checking_
return private_fl_utf_string_compare(string1.string, string2.string, range1.start, range2.start, range1.stop + 1, range2.stop + 1);
}
#ifndef _di_fl_utf_string_dynamic_partial_compare_trim_
f_status_t fl_utf_string_dynamic_partial_compare_trim(const f_utf_string_static_t string1, const f_utf_string_static_t string2, const f_string_range_t range1, const f_string_range_t range2) {
- #ifndef _di_level_1_parameter_checking_
- if (string1.used <= range1.stop) return F_status_set_error(F_parameter);
- if (string2.used <= range2.stop) return F_status_set_error(F_parameter);
- #endif // _di_level_1_parameter_checking_
return private_fl_utf_string_compare_trim(string1.string, string2.string, range1.start, range2.start, range1.stop + 1, range2.stop + 1);
}