From: Kevin Day Date: Sat, 4 Dec 2021 23:03:56 +0000 (-0600) Subject: Update: Wrap up utf8 program. X-Git-Tag: 0.5.7~64 X-Git-Url: https://git.kevux.org/?a=commitdiff_plain;h=53c4bd99a0632ea0d34206de489698860072e018;p=fll Update: Wrap up utf8 program. This seems to be a good point to stop. The program is only intended to be simple. Complete the functionality and consider all future problems bugs. Some of the parameters are not used correctly. The strip-invalid is not being used. The verify is being used as strip-invalid (this is likely the result of the previously incomplete code being accidentally committed). Add a separate parameter to optionally separate by newlines when headers are not being printed. The verify should disable printing. The quiet verbosity should not hide printed headers as those are considered data. Remove redundant newline being printed when headers parameter is used. --- diff --git a/level_3/utf8/c/private-print.c b/level_3/utf8/c/private-print.c index 617c66c..64d067e 100644 --- a/level_3/utf8/c/private-print.c +++ b/level_3/utf8/c/private-print.c @@ -9,9 +9,7 @@ extern "C" { #ifndef _di_utf8_print_character_ void utf8_print_character(utf8_data_t * const data, const f_string_static_t character, const f_color_set_t set) { - if (data->main->parameters[utf8_parameter_strip_invalid].result == f_console_result_found || !character.used) { - return; - } + if (!character.used) return; if (data->mode & utf8_mode_to_binary_d) { fl_print_format("%s%[%r%]%s", data->file.stream, data->prepend, set, character, set, data->append); @@ -124,8 +122,8 @@ extern "C" { #ifndef _di_utf8_print_section_header_file_ void utf8_print_section_header_file(utf8_data_t * const data, const f_string_t name) { - if (data->main->output.verbosity == f_console_verbosity_quiet) return; if (data->main->parameters[utf8_parameter_headers].result == f_console_result_none) return; + if (data->main->parameters[utf8_parameter_verify].result == f_console_result_found) return; flockfile(data->main->output.to.stream); @@ -145,8 +143,8 @@ extern "C" { #ifndef _di_utf8_print_section_header_parameter_ void utf8_print_section_header_parameter(utf8_data_t * const data, const f_array_length_t index) { - if (data->main->output.verbosity == f_console_verbosity_quiet) return; if (data->main->parameters[utf8_parameter_headers].result == f_console_result_none) return; + if (data->main->parameters[utf8_parameter_verify].result == f_console_result_found) return; flockfile(data->main->output.to.stream); @@ -160,8 +158,8 @@ extern "C" { #ifndef _di_utf8_print_section_header_pipe_ void utf8_print_section_header_pipe(utf8_data_t * const data) { - if (data->main->output.verbosity == f_console_verbosity_quiet) return; if (data->main->parameters[utf8_parameter_headers].result == f_console_result_none) return; + if (data->main->parameters[utf8_parameter_verify].result == f_console_result_found) return; fll_print_format("%c%[Pipe%]:%c", data->main->output.to.stream, f_string_eol_s[0], data->main->output.set->title, data->main->output.set->title, f_string_eol_s[0]); } diff --git a/level_3/utf8/c/private-utf8.c b/level_3/utf8/c/private-utf8.c index d8f22c5..088944a 100644 --- a/level_3/utf8/c/private-utf8.c +++ b/level_3/utf8/c/private-utf8.c @@ -106,6 +106,7 @@ extern "C" { } else { mode_codepoint = utf8_codepoint_mode_bad_end; + valid = F_false; } character.used = 0; @@ -121,7 +122,11 @@ extern "C" { } if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { - f_print_terminated(f_string_eol_s, data->file.stream); + + // When headers are printed, they are printed with a newline so only print this newline when separate is used without headers being printed. + if (data->main->parameters[utf8_parameter_headers].result == f_console_result_none && data->main->parameters[utf8_parameter_separate].result == f_console_result_found) { + f_print_terminated(f_string_eol_s, data->file.stream); + } } funlockfile(data->file.stream); diff --git a/level_3/utf8/c/private-utf8_binary.c b/level_3/utf8/c/private-utf8_binary.c index 718c2f1..4c43ebe 100644 --- a/level_3/utf8/c/private-utf8_binary.c +++ b/level_3/utf8/c/private-utf8_binary.c @@ -13,6 +13,7 @@ extern "C" { f_status_t utf8_convert_binary(utf8_data_t * const data, const f_string_static_t character) { f_status_t status = F_none; + bool valid_not = F_false; uint32_t codepoint = 0; @@ -24,11 +25,17 @@ extern "C" { } if (F_status_is_error(status)) { - if (F_status_set_fine(status) == F_failure || F_status_set_fine(status) == F_utf) { - utf8_print_character(data, character, data->valid_not); + if (F_status_set_fine(status) == F_failure || F_status_set_fine(status) == F_utf || F_status_set_fine(status) == F_valid_not) { + valid_not = F_true; + + if (data->main->parameters[utf8_parameter_strip_invalid].result == f_console_result_none && data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { + utf8_print_character(data, character, data->valid_not); + } } else { - utf8_print_error_decode(data, status, character); + if (data->main->parameters[utf8_parameter_strip_invalid].result == f_console_result_none && data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { + utf8_print_error_decode(data, status, character); + } return status; } @@ -42,7 +49,7 @@ extern "C" { } } - if (F_status_is_error(status)) { + if (valid_not || F_status_is_error(status)) { return F_utf; } diff --git a/level_3/utf8/c/private-utf8_codepoint.c b/level_3/utf8/c/private-utf8_codepoint.c index 849c937..2af6c7d 100644 --- a/level_3/utf8/c/private-utf8_codepoint.c +++ b/level_3/utf8/c/private-utf8_codepoint.c @@ -13,6 +13,7 @@ extern "C" { f_status_t utf8_convert_codepoint(utf8_data_t * const data, const f_string_static_t character, uint8_t *mode) { f_status_t status = F_none; + bool valid_not = F_false; if (*mode != utf8_codepoint_mode_end) { if (data->text.used + character.used >= data->text.size) { @@ -35,14 +36,22 @@ extern "C" { status = f_utf_unicode_string_to(data->text.string, data->text.used, &codepoint); if (F_status_is_error(status)) { - if (F_status_set_fine(status) == F_failure || F_status_set_fine(status) == F_utf) { - utf8_print_character(data, data->text, data->valid_not); + if (F_status_set_fine(status) == F_failure || F_status_set_fine(status) == F_utf || F_status_set_fine(status) == F_valid_not) { + valid_not = F_true; + + if (data->main->parameters[utf8_parameter_strip_invalid].result == f_console_result_none && data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { + fl_print_format("%s%[%r%]%s", data->file.stream, data->prepend, data->valid_not, data->text, data->valid_not, data->append); + } } else { - utf8_print_error_decode(data, status, character); + if (data->main->parameters[utf8_parameter_strip_invalid].result == f_console_result_none && data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { + utf8_print_error_decode(data, status, character); + } + + return status; } } - else { + else if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { if (data->mode & utf8_mode_to_binary_d) { char byte[5] = { 0, 0, 0, 0, 0 }; f_string_static_t text = f_string_static_t_initialize; @@ -69,12 +78,18 @@ extern "C" { else { status = F_none; - utf8_print_character(data, data->text, data->valid_not); + if (data->main->parameters[utf8_parameter_verify].result == f_console_result_none) { + fl_print_format("%s%[%r%]%s", data->file.stream, data->prepend, data->valid_not, data->text, data->valid_not, data->append); + } } *mode = utf8_codepoint_mode_ready; data->text.used = 0; + if (valid_not || F_status_is_error(status)) { + return F_utf; + } + return status; } #endif // _di_utf8_convert_codepoint_ diff --git a/level_3/utf8/c/utf8.c b/level_3/utf8/c/utf8.c index a94eef7..c602411 100644 --- a/level_3/utf8/c/utf8.c +++ b/level_3/utf8/c/utf8.c @@ -41,7 +41,8 @@ extern "C" { f_print_character(f_string_eol_s[0], file.stream); fll_program_print_help_option(file, context, utf8_short_headers_s, utf8_long_headers_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Print headers for each section (pipe, file, or parameter)."); - fll_program_print_help_option(file, context, utf8_short_strip_invalid_s, utf8_long_strip_invalid_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, "Strip invalid Unicode characters."); + fll_program_print_help_option(file, context, utf8_short_separate_s, utf8_long_separate_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Separate characters by newlines (implied when printing headers)."); + fll_program_print_help_option(file, context, utf8_short_strip_invalid_s, utf8_long_strip_invalid_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, "Strip invalid Unicode characters (do not print invalid sequences)."); fll_program_print_help_option(file, context, utf8_short_verify_s, utf8_long_verify_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Only perform verification of valid sequences."); f_print_character(f_string_eol_s[0], file.stream); @@ -53,7 +54,7 @@ extern "C" { fl_print_format(" Multiple input sources are allowed but only a single output destination is allowed.%c%c", file.stream, f_string_eol_s[0], f_string_eol_s[0]); - fl_print_format(" When using the parameter '%[%s%s%]', only invalid data is printed and 0 is returned if valid or 1 is returned if invalid.%c", file.stream, context.set.notable, f_console_symbol_long_enable_s, utf8_long_verify_s, context.set.notable, f_string_eol_s[0]); + fl_print_format(" When using the parameter '%[%s%s%]', no data is printed and 0 is returned if valid or 1 is returned if invalid.%c", file.stream, context.set.notable, f_console_symbol_long_enable_s, utf8_long_verify_s, context.set.notable, f_string_eol_s[0]); funlockfile(file.stream); @@ -251,14 +252,6 @@ extern "C" { } } - if (main->parameters[utf8_parameter_verify].result == f_console_result_found) { - if (main->parameters[utf8_parameter_strip_invalid].result == f_console_result_found) { - utf8_print_error_parameter_conflict(&data, utf8_long_verify_s, utf8_long_strip_invalid_s); - - status = F_status_set_error(F_parameter); - } - } - if (F_status_is_error_not(status)) { if (main->parameters[utf8_parameter_from_file].result == f_console_result_additional) { f_array_length_t i = 0; @@ -338,14 +331,8 @@ extern "C" { } if (data.mode & utf8_mode_to_codepoint_d) { - if (main->parameters[utf8_parameter_verify].result == f_console_result_found) { - if (main->parameters[utf8_parameter_headers].result == f_console_result_found) { - data.prepend = " "; - } - else { - data.prepend = f_string_space_s; - } - + if (main->parameters[utf8_parameter_separate].result == f_console_result_found || main->parameters[utf8_parameter_headers].result == f_console_result_found) { + data.prepend = " "; data.append = f_string_eol_s; } else { @@ -453,7 +440,7 @@ extern "C" { } } - if (main->output.verbosity != f_console_verbosity_quiet) { + if (main->output.verbosity != f_console_verbosity_quiet && main->parameters[utf8_parameter_verify].result == f_console_result_none) { if (F_status_set_fine(status) == F_interrupt) { fflush(data.file.stream); diff --git a/level_3/utf8/c/utf8.h b/level_3/utf8/c/utf8.h index 4bbf067..7339684 100644 --- a/level_3/utf8/c/utf8.h +++ b/level_3/utf8/c/utf8.h @@ -112,6 +112,7 @@ extern "C" { #define utf8_short_from_file_s "f" #define utf8_short_headers_s "H" + #define utf8_short_separate_s "S" #define utf8_short_strip_invalid_s "s" #define utf8_short_verify_s "v" @@ -124,6 +125,7 @@ extern "C" { #define utf8_long_from_file_s "from_file" #define utf8_long_headers_s "headers" + #define utf8_long_separate_s "separate" #define utf8_long_strip_invalid_s "strip_invalid" #define utf8_long_verify_s "verify" @@ -147,6 +149,7 @@ extern "C" { utf8_parameter_from_file, utf8_parameter_headers, + utf8_parameter_separate, utf8_parameter_strip_invalid, utf8_parameter_to_binary, @@ -171,6 +174,7 @@ extern "C" { f_console_parameter_t_initialize(utf8_short_from_codepoint_s, utf8_long_from_codepoint_s, 0, 0, f_console_type_normal), \ f_console_parameter_t_initialize(utf8_short_from_file_s, utf8_long_from_file_s, 0, 1, f_console_type_normal), \ f_console_parameter_t_initialize(utf8_short_headers_s, utf8_long_headers_s, 0, 0, f_console_type_normal), \ + f_console_parameter_t_initialize(utf8_short_separate_s, utf8_long_headers_s, 0, 0, f_console_type_normal), \ f_console_parameter_t_initialize(utf8_short_strip_invalid_s, utf8_long_strip_invalid_s, 0, 0, f_console_type_normal), \ f_console_parameter_t_initialize(utf8_short_to_binary_s, utf8_long_to_binary_s, 0, 0, f_console_type_normal), \ f_console_parameter_t_initialize(utf8_short_to_codepoint_s, utf8_long_to_codepoint_s, 0, 0, f_console_type_normal), \ @@ -178,7 +182,7 @@ extern "C" { f_console_parameter_t_initialize(utf8_short_verify_s, utf8_long_verify_s, 0, 0, f_console_type_normal), \ } - #define utf8_total_parameters_d 18 + #define utf8_total_parameters_d 19 #endif // _di_utf8_defines_ /**