From 29aeb97ac13e192ec850667f6067c672cd5d8722 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Thu, 23 Apr 2020 22:32:59 -0500 Subject: [PATCH] Update: rename whitespace functions to space functions and consider zero-width Whitespace is now being used more explicitly in regards to UTF-8. Zero-width consideratons added, but I think more work is needed in this regard. --- level_1/fl_fss/c/fss.c | 20 +++++++++----------- level_1/fl_fss/c/fss.h | 20 +++++++++++++------- level_1/fl_fss/c/fss_basic.c | 4 ++-- level_1/fl_fss/c/fss_basic_list.c | 2 +- level_1/fl_fss/c/fss_extended.c | 4 ++-- level_1/fl_fss/c/fss_extended_list.c | 2 +- 6 files changed, 28 insertions(+), 24 deletions(-) diff --git a/level_1/fl_fss/c/fss.c b/level_1/fl_fss/c/fss.c index 5288409..0144ec0 100644 --- a/level_1/fl_fss/c/fss.c +++ b/level_1/fl_fss/c/fss.c @@ -291,8 +291,8 @@ extern "C" { } #endif // _di_fl_fss_is_space_ -#ifndef _di_fl_fss_skip_past_whitespace_ - f_return_status fl_fss_skip_past_whitespace(const f_string_dynamic buffer, f_string_location *location) { +#ifndef _di_fl_fss_skip_past_space_ + f_return_status fl_fss_skip_past_space(const f_string_dynamic buffer, f_string_location *location) { #ifndef _di_level_1_parameter_checking_ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter); if (location == 0) return f_status_set_error(f_invalid_parameter); @@ -310,7 +310,7 @@ extern "C" { max_width = buffer.used - location->start; } - while (buffer.string[location->start] == f_string_eos || (status = f_utf_is_whitespace(buffer.string + location->start, max_width)) == f_true) { + while (buffer.string[location->start] == f_string_eos || (status = f_utf_is_whitespace(buffer.string + location->start, max_width)) == f_true || (status = f_utf_is_control(buffer.string + location->start, max_width)) == f_true) { if (f_status_is_error(status)) { return status; } @@ -349,10 +349,10 @@ extern "C" { return f_none; } -#endif // _di_fl_fss_skip_past_whitespace_ +#endif // _di_fl_fss_skip_past_space_ -#ifndef _di_fl_fss_skip_past_all_whitespace_ - f_return_status fl_fss_skip_past_all_whitespace(const f_string_dynamic buffer, f_string_location *location) { +#ifndef _di_fl_fss_skip_past_non_graph_ + f_return_status fl_fss_skip_past_non_graph(const f_string_dynamic buffer, f_string_location *location) { #ifndef _di_level_1_parameter_checking_ if (buffer.used <= 0) return f_status_set_error(f_invalid_parameter); if (location == 0) return f_status_set_error(f_invalid_parameter); @@ -370,10 +370,8 @@ extern "C" { max_width = buffer.used - location->start; } - while (buffer.string[location->start] == f_string_eos || (status = f_utf_is_graph(buffer.string + location->start, max_width)) == f_false) { - if (f_status_is_error(status)) { - return status; - } + while (buffer.string[location->start] == f_string_eos || ((status = f_utf_is_graph(buffer.string + location->start, max_width)) == f_false && (status = f_utf_is_zero_width(buffer.string + location->start, max_width)) == f_false)) { + if (f_status_is_error(status)) return status; width = f_macro_utf_byte_width_is(buffer.string[location->start]); @@ -407,7 +405,7 @@ extern "C" { return f_none; } -#endif // _di_fl_fss_skip_past_all_whitespace_ +#endif // _di_fl_fss_skip_past_non_graph_ #ifndef _di_fl_fss_shift_delimiters_ f_return_status fl_fss_shift_delimiters(f_string_dynamic *buffer, const f_string_location location) { diff --git a/level_1/fl_fss/c/fss.h b/level_1/fl_fss/c/fss.h index 04e027a..2c4efbb 100644 --- a/level_1/fl_fss/c/fss.h +++ b/level_1/fl_fss/c/fss.h @@ -192,6 +192,9 @@ extern "C" { /** * Skip past all whitespace and control characters, except newline. * + * Zero-width characters are not skipped because they might be part of a graph character, such as combining characters. + * @todo needs consideration on how to handle zero-width before space/control vs zero-width before graph. + * * @param buffer * The string to process. * @param location @@ -202,12 +205,15 @@ extern "C" { * f_none on success. * f_invalid_parameter (with error bit) if a parameter is invalid. */ -#ifndef _di_fl_fss_skip_past_whitespace_ - extern f_return_status fl_fss_skip_past_whitespace(const f_string_dynamic buffer, f_string_location *location); -#endif // _di_fl_fss_skip_past_whitespace_ +#ifndef _di_fl_fss_skip_past_space_ + extern f_return_status fl_fss_skip_past_space(const f_string_dynamic buffer, f_string_location *location); +#endif // _di_fl_fss_skip_past_space_ /** - * Skip past all whitespace and control characters. + * Skip past all non-graph and non-zero-width characters (whitespace and control characters). + * + * Zero-width characters are not skipped because they might be part of a graph character, such as combining characters. + * @todo needs consideration on how to handle zero-width before space/control vs zero-width before graph. * * @param buffer * The string to process. @@ -219,9 +225,9 @@ extern "C" { * f_none on success. * f_invalid_parameter (with error bit) if a parameter is invalid. */ -#ifndef _di_fl_fss_skip_past_all_whitespace_ - extern f_return_status fl_fss_skip_past_all_whitespace(const f_string_dynamic buffer, f_string_location *location); -#endif // _di_fl_fss_skip_past_all_whitespace_ +#ifndef _di_fl_fss_skip_past_non_graph_ + extern f_return_status fl_fss_skip_past_non_graph(const f_string_dynamic buffer, f_string_location *location); +#endif // _di_fl_fss_skip_past_non_graph_ #ifdef __cplusplus } // extern "C" diff --git a/level_1/fl_fss/c/fss_basic.c b/level_1/fl_fss/c/fss_basic.c index aeda4dc..0248a5e 100644 --- a/level_1/fl_fss/c/fss_basic.c +++ b/level_1/fl_fss/c/fss_basic.c @@ -21,7 +21,7 @@ extern "C" { // delimits must only be applied once a valid object is found f_string_lengths delimits = f_string_lengths_initialize; - fl_fss_skip_past_whitespace(*buffer, location); + fl_fss_skip_past_space(*buffer, location); fl_macro_fss_object_return_on_overflow((*buffer), (*location), (*found), delimits, f_no_data_on_eos, f_no_data_on_stop) // return found nothing if this line only contains whitespace and delimit placeholders @@ -385,7 +385,7 @@ extern "C" { // delimits must only be applied once a valid object is found f_string_lengths delimits = f_string_lengths_initialize; - fl_fss_skip_past_whitespace(*buffer, location); + fl_fss_skip_past_space(*buffer, location); fl_macro_fss_content_return_on_overflow((*buffer), (*location), (*found), delimits, f_none_on_eos, f_none_on_stop) // return found nothing if this line only contains whitespace and delimit placeholders diff --git a/level_1/fl_fss/c/fss_basic_list.c b/level_1/fl_fss/c/fss_basic_list.c index bb3a1ec..576e42b 100644 --- a/level_1/fl_fss/c/fss_basic_list.c +++ b/level_1/fl_fss/c/fss_basic_list.c @@ -21,7 +21,7 @@ extern "C" { // delimits must only be applied once a valid object is found. f_string_lengths delimits = f_string_lengths_initialize; - fl_fss_skip_past_whitespace(*buffer, location); + fl_fss_skip_past_space(*buffer, location); fl_macro_fss_object_return_on_overflow((*buffer), (*location), (*found), delimits, f_no_data_on_eos, f_no_data_on_stop) // return found nothing if this line only contains whitespace and delimit placeholders. diff --git a/level_1/fl_fss/c/fss_extended.c b/level_1/fl_fss/c/fss_extended.c index b19bd98..77ca674 100644 --- a/level_1/fl_fss/c/fss_extended.c +++ b/level_1/fl_fss/c/fss_extended.c @@ -21,7 +21,7 @@ extern "C" { // delimits must only be applied once a valid object is found f_string_lengths delimits = f_string_lengths_initialize; - fl_fss_skip_past_whitespace(*buffer, location); + fl_fss_skip_past_space(*buffer, location); fl_macro_fss_object_return_on_overflow((*buffer), (*location), (*found), delimits, f_no_data_on_eos, f_no_data_on_stop) // return found nothing if this line only contains whitespace and delimit placeholders @@ -377,7 +377,7 @@ extern "C" { // delimits must only be applied once a valid object is found f_string_lengths delimits = f_string_lengths_initialize; - fl_fss_skip_past_whitespace(*buffer, location); + fl_fss_skip_past_space(*buffer, location); fl_macro_fss_content_return_on_overflow((*buffer), (*location), (*found), delimits, f_none_on_eos, f_none_on_stop) // return found nothing if this line only contains whitespace and delimit placeholders diff --git a/level_1/fl_fss/c/fss_extended_list.c b/level_1/fl_fss/c/fss_extended_list.c index b0c1e25..87415e2 100644 --- a/level_1/fl_fss/c/fss_extended_list.c +++ b/level_1/fl_fss/c/fss_extended_list.c @@ -21,7 +21,7 @@ extern "C" { // delimits must only be applied once a valid object is found. f_string_lengths delimits = f_string_lengths_initialize; - fl_fss_skip_past_whitespace(*buffer, location); + fl_fss_skip_past_space(*buffer, location); fl_macro_fss_object_return_on_overflow((*buffer), (*location), (*found), delimits, f_no_data_on_eos, f_no_data_on_stop) // return found nothing if this line only contains whitespace and delimit placeholders. -- 1.8.3.1