From 48f47eec24dcac6822ddf4a8230bfc70c8334f0c Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Sat, 22 May 2021 23:12:59 -0500 Subject: [PATCH] Feature: Support Unicode format in byte_dump. --- level_3/byte_dump/c/byte_dump.c | 7 + level_3/byte_dump/c/byte_dump.h | 6 +- level_3/byte_dump/c/private-byte_dump.c | 313 ++++++++++++++++++++++---------- 3 files changed, 227 insertions(+), 99 deletions(-) diff --git a/level_3/byte_dump/c/byte_dump.c b/level_3/byte_dump/c/byte_dump.c index e41fcee..cc6591d 100644 --- a/level_3/byte_dump/c/byte_dump.c +++ b/level_3/byte_dump/c/byte_dump.c @@ -28,6 +28,7 @@ extern "C" { fll_program_print_help_option(output, context, byte_dump_short_duodecimal, byte_dump_long_duodecimal, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Display duodecimal representation."); fll_program_print_help_option(output, context, byte_dump_short_hexidecimal, byte_dump_long_hexidecimal, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, "Display hexadecimal representation."); fll_program_print_help_option(output, context, byte_dump_short_octal, byte_dump_long_octal, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Display octal representation."); + fll_program_print_help_option(output, context, byte_dump_short_unicode, byte_dump_long_unicode, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Display using Unicode representation for valid Unicode (like: U+0000)."); fprintf(output.stream, "%c", f_string_eol_s[0]); @@ -72,6 +73,12 @@ extern "C" { fprintf(output.stream, "%c%c", f_string_eol_s[0], f_string_eol_s[0]); + fprintf(output.stream, " When using the "); + f_color_print(output.stream, context.set.notable, "%s%s", f_console_symbol_long_enable_s, byte_dump_long_unicode); + fprintf(output.stream, " option, invalid Unicode will fallback to being displayed using one of the other modes."); + + fprintf(output.stream, "%c%c", f_string_eol_s[0], f_string_eol_s[0]); + return F_none; } #endif // _di_byte_dump_print_help_ diff --git a/level_3/byte_dump/c/byte_dump.h b/level_3/byte_dump/c/byte_dump.h index a45b1be..98e8241 100644 --- a/level_3/byte_dump/c/byte_dump.h +++ b/level_3/byte_dump/c/byte_dump.h @@ -125,6 +125,7 @@ extern "C" { #define byte_dump_short_duodecimal "D" #define byte_dump_short_hexidecimal "x" #define byte_dump_short_octal "o" + #define byte_dump_short_unicode "U" #define byte_dump_short_first "f" #define byte_dump_short_last "l" @@ -137,6 +138,7 @@ extern "C" { #define byte_dump_long_duodecimal "duodecimal" #define byte_dump_long_hexidecimal "hexidecimal" #define byte_dump_long_octal "octal" + #define byte_dump_long_unicode "unicode" #define byte_dump_long_first "first" // first offset byte size. #define byte_dump_long_last "last" // last offset byte size. @@ -165,6 +167,7 @@ extern "C" { byte_dump_parameter_duodecimal, byte_dump_parameter_hexidecimal, byte_dump_parameter_octal, + byte_dump_parameter_unicode, byte_dump_parameter_first, byte_dump_parameter_last, @@ -194,6 +197,7 @@ extern "C" { f_console_parameter_t_initialize(byte_dump_short_duodecimal, byte_dump_long_duodecimal, 0, 0, f_console_type_normal), \ f_console_parameter_t_initialize(byte_dump_short_hexidecimal, byte_dump_long_hexidecimal, 0, 0, f_console_type_normal), \ f_console_parameter_t_initialize(byte_dump_short_octal, byte_dump_long_octal, 0, 0, f_console_type_normal), \ + f_console_parameter_t_initialize(byte_dump_short_unicode, byte_dump_long_unicode, 0, 0, f_console_type_normal), \ f_console_parameter_t_initialize(byte_dump_short_first, byte_dump_long_first, 0, 1, f_console_type_normal), \ f_console_parameter_t_initialize(byte_dump_short_last, byte_dump_long_last, 0, 1, f_console_type_normal), \ f_console_parameter_t_initialize(byte_dump_short_width, byte_dump_long_width, 0, 1, f_console_type_normal), \ @@ -204,7 +208,7 @@ extern "C" { f_console_parameter_t_initialize(0, byte_dump_long_classic, 0, 0, f_console_type_normal), \ } - #define byte_dump_total_parameters 22 + #define byte_dump_total_parameters 23 #endif // _di_byte_dump_defines_ #ifndef _di_byte_dump_main_t_ diff --git a/level_3/byte_dump/c/private-byte_dump.c b/level_3/byte_dump/c/private-byte_dump.c index 53eda39..889eed3 100644 --- a/level_3/byte_dump/c/private-byte_dump.c +++ b/level_3/byte_dump/c/private-byte_dump.c @@ -177,7 +177,11 @@ extern "C" { previous.invalid = 0; while (cell.column < main.width) { - if (main.mode == byte_dump_mode_hexidecimal) { + + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + fprintf(main.output.stream, " "); + } + else if (main.mode == byte_dump_mode_hexidecimal) { fprintf(main.output.stream, " "); } else if (main.mode == byte_dump_mode_duodecimal) { @@ -196,20 +200,35 @@ extern "C" { ++cell.column; if (cell.column < main.width) { - if (main.mode == byte_dump_mode_hexidecimal && cell.column % 8 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + if (!(cell.column % 4)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } - else if (main.mode == byte_dump_mode_duodecimal && cell.column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else if (main.mode == byte_dump_mode_hexidecimal) { + if (!(cell.column % 8)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } - else if (main.mode == byte_dump_mode_octal && cell.column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else if (main.mode == byte_dump_mode_duodecimal) { + if (!(cell.column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } - else if (main.mode == byte_dump_mode_binary && cell.column % 4 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else if (main.mode == byte_dump_mode_octal) { + if (!(cell.column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } - else if (main.mode == byte_dump_mode_decimal && cell.column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else if (main.mode == byte_dump_mode_binary) { + if (!(cell.column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } + } + else if (main.mode == byte_dump_mode_decimal) { + if (!(cell.column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } } } // while @@ -274,12 +293,16 @@ extern "C" { if (!cell->column) { f_color_print(main.output.stream, main.context.set.notable, "%016X ", (uint64_t) cell->row); - if (*offset > 0) { + if (*offset) { uint8_t offset_to_print = *offset; // Pad the buffer with spaces to hide any skipped bytes (skipped via --first). - while (offset_to_print > 0 && cell->column < main.width) { - if (main.mode == byte_dump_mode_hexidecimal) { + while (offset_to_print && cell->column < main.width) { + + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + fprintf(main.output.stream, " "); + } + else if (main.mode == byte_dump_mode_hexidecimal) { fprintf(main.output.stream, " "); } else if (main.mode == byte_dump_mode_duodecimal) { @@ -299,20 +322,35 @@ extern "C" { ++cell->column; if (cell->column < main.width) { - if (main.mode == byte_dump_mode_hexidecimal && cell->column % 8 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + if (!(cell->column % 4)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } - else if (main.mode == byte_dump_mode_duodecimal && cell->column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else if (main.mode == byte_dump_mode_hexidecimal) { + if (!(cell->column % 8)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } - else if (main.mode == byte_dump_mode_octal && cell->column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else if (main.mode == byte_dump_mode_duodecimal) { + if (!(cell->column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } - else if (main.mode == byte_dump_mode_binary && cell->column % 4 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else if (main.mode == byte_dump_mode_octal) { + if (!(cell->column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } - else if (main.mode == byte_dump_mode_decimal && cell->column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else if (main.mode == byte_dump_mode_binary) { + if (!(cell->column % 4)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } + } + else if (main.mode == byte_dump_mode_decimal) { + if (!(cell->column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } } } } @@ -320,82 +358,144 @@ extern "C" { } if (cell->column < main.width) { - if (main.mode == byte_dump_mode_hexidecimal) { - if (invalid[character_current]) { - f_color_print(main.output.stream, main.context.set.error, " %02x", (uint8_t) byte); - } - else { - fprintf(main.output.stream, " %02x", (uint8_t) byte); - } - } - else if (main.mode == byte_dump_mode_duodecimal) { - if (invalid[character_current]) { - fprintf(main.output.stream, "%s", main.context.error.string); - } + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found && !invalid[character_current]) { + if (byte_current == 1) { + uint32_t unicode = 0; - fprintf(main.output.stream, " %01d", byte / 144); + if (width_utf < 2) { - uint8_t current = (byte % 144) / 12; + // 1 == U+0000 -> U+007F + unicode = macro_f_utf_character_t_to_char_1(characters.string[character_current]) & 0x7f; + } + else if (width_utf == 2) { - if (current == 11) { - fprintf(main.output.stream, "%s", f_string_ascii_b_s); - } - else if (current == 10) { - fprintf(main.output.stream, "%s", f_string_ascii_a_s); - } - else { - fprintf(main.output.stream, "%01d", current); - } + // 2 == U+0080 -> U+07FF + unicode = (macro_f_utf_character_t_to_char_1(characters.string[character_current]) & 0x1f) << 6; + unicode |= macro_f_utf_character_t_to_char_2(characters.string[character_current]) & 0x3f; + } + else if (width_utf == 3) { - current = (byte % 144) % 12; + // 3 == U+0800 -> U+FFFF + unicode = (macro_f_utf_character_t_to_char_1(characters.string[character_current]) & 0xf) << 12; + unicode |= (macro_f_utf_character_t_to_char_2(characters.string[character_current]) & 0x3f) << 6; + unicode |= macro_f_utf_character_t_to_char_3(characters.string[character_current]) & 0x3f; + } + else if (width_utf == 4) { - if (current == 11) { - fprintf(main.output.stream, "%s", f_string_ascii_b_s); - } - else if (current == 10) { - fprintf(main.output.stream, "%s", f_string_ascii_a_s); + // 4 == U+10000 -> U+10FFFF + unicode = (macro_f_utf_character_t_to_char_1(characters.string[character_current]) & 0x7) << 18; + unicode |= (macro_f_utf_character_t_to_char_2(characters.string[character_current]) & 0x3f) << 12; + unicode |= (macro_f_utf_character_t_to_char_2(characters.string[character_current]) & 0x3f) << 6; + unicode |= macro_f_utf_character_t_to_char_4(characters.string[character_current]) & 0x3f; + } + + if (width_utf < 4) { + fprintf(main.output.stream, " U+%04x ", (uint32_t) unicode); + } + else { + fprintf(main.output.stream, " U+%06x", (uint32_t) unicode); + } } else { - fprintf(main.output.stream, "%01d", current); - } - if (invalid[character_current]) { - fprintf(main.output.stream, "%s", main.context.reset.string); + // Pad the characters that are incomplete fragments of an already printed valid Unicode. + fprintf(main.output.stream, " "); } } - else if (main.mode == byte_dump_mode_octal) { - if (invalid[character_current]) { - f_color_print(main.output.stream, main.context.set.error, " %03o", (uint8_t) byte); - } - else { - fprintf(main.output.stream, " %03o", (uint8_t) byte); + else { + if (main.mode == byte_dump_mode_hexidecimal) { + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + fprintf(main.output.stream, " "); + } + + if (invalid[character_current]) { + f_color_print(main.output.stream, main.context.set.error, " %02x", (uint8_t) byte); + } + else { + fprintf(main.output.stream, " %02x", (uint8_t) byte); + } } - } - else if (main.mode == byte_dump_mode_binary) { - char binary_string[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + else if (main.mode == byte_dump_mode_duodecimal) { + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + fprintf(main.output.stream, " "); + } + + if (invalid[character_current]) { + fprintf(main.output.stream, "%s", main.context.error.string); + } + + fprintf(main.output.stream, " %01d", byte / 144); - binary_string[0] = ((byte >> 7) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; - binary_string[1] = ((byte >> 6) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; - binary_string[2] = ((byte >> 5) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; - binary_string[3] = ((byte >> 4) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; - binary_string[4] = ((byte >> 3) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; - binary_string[5] = ((byte >> 2) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; - binary_string[6] = ((byte >> 1) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; - binary_string[7] = (byte & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + uint8_t current = (byte % 144) / 12; - if (invalid[character_current]) { - f_color_print(main.output.stream, main.context.set.error, " %s", binary_string); + if (current == 11) { + fprintf(main.output.stream, "%s", f_string_ascii_b_s); + } + else if (current == 10) { + fprintf(main.output.stream, "%s", f_string_ascii_a_s); + } + else { + fprintf(main.output.stream, "%01d", current); + } + + current = (byte % 144) % 12; + + if (current == 11) { + fprintf(main.output.stream, "%s", f_string_ascii_b_s); + } + else if (current == 10) { + fprintf(main.output.stream, "%s", f_string_ascii_a_s); + } + else { + fprintf(main.output.stream, "%01d", current); + } + + if (invalid[character_current]) { + fprintf(main.output.stream, "%s", main.context.reset.string); + } } - else { - fprintf(main.output.stream, " %s", binary_string); + else if (main.mode == byte_dump_mode_octal) { + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + fprintf(main.output.stream, " "); + } + + if (invalid[character_current]) { + f_color_print(main.output.stream, main.context.set.error, " %03o", (uint8_t) byte); + } + else { + fprintf(main.output.stream, " %03o", (uint8_t) byte); + } } - } - else if (main.mode == byte_dump_mode_decimal) { - if (invalid[character_current]) { - f_color_print(main.output.stream, main.context.set.error, " %3d", (uint8_t) byte); + else if (main.mode == byte_dump_mode_binary) { + char binary_string[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + binary_string[0] = ((byte >> 7) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + binary_string[1] = ((byte >> 6) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + binary_string[2] = ((byte >> 5) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + binary_string[3] = ((byte >> 4) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + binary_string[4] = ((byte >> 3) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + binary_string[5] = ((byte >> 2) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + binary_string[6] = ((byte >> 1) & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + binary_string[7] = (byte & 0x01) ? f_string_ascii_1_s[0] : f_string_ascii_0_s[0]; + + if (invalid[character_current]) { + f_color_print(main.output.stream, main.context.set.error, " %s", binary_string); + } + else { + fprintf(main.output.stream, " %s", binary_string); + } } - else { - fprintf(main.output.stream, " %3d", (uint8_t) byte); + else if (main.mode == byte_dump_mode_decimal) { + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + fprintf(main.output.stream, " "); + } + + if (invalid[character_current]) { + f_color_print(main.output.stream, main.context.set.error, " %3d", (uint8_t) byte); + } + else { + fprintf(main.output.stream, " %3d", (uint8_t) byte); + } } } @@ -430,20 +530,37 @@ extern "C" { previous->invalid = invalid[character_current]; } } - else if (main.mode == byte_dump_mode_hexidecimal && cell->column % 8 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); - } - else if (main.mode == byte_dump_mode_duodecimal && cell->column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); - } - else if (main.mode == byte_dump_mode_octal && cell->column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); - } - else if (main.mode == byte_dump_mode_binary && cell->column % 4 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); - } - else if (main.mode == byte_dump_mode_decimal && cell->column % 6 == 0) { - fprintf(main.output.stream, "%s", f_string_space_s); + else { + if (main.parameters[byte_dump_parameter_unicode].result == f_console_result_found) { + if (!(cell->column % 4)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } + } + else if (main.mode == byte_dump_mode_hexidecimal) { + if (!(cell->column % 8)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } + } + else if (main.mode == byte_dump_mode_duodecimal) { + if (!(cell->column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } + } + else if (main.mode == byte_dump_mode_octal) { + if (!(cell->column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } + } + else if (main.mode == byte_dump_mode_binary) { + if (!(cell->column % 4)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } + } + else if (main.mode == byte_dump_mode_decimal) { + if (!(cell->column % 6)) { + fprintf(main.output.stream, "%s", f_string_space_s); + } + } } return reset; -- 1.8.3.1