The term "bytecode" already exists and is used for a slighty different purpoe (representing compiled or partially compiled data).
This is a different context.
To avoid using the term improperly, switch to a more proper term "bytesequence" (as one word).
A byte sequence is a term representing a sequence of bytes.
This is more specific than binary and effectively emphasis that this is in regards to bytes.
Avoiding the term binary, however correct or not the term may be, helps avoid confusion due to "binary" and "text" data being considered two separate things.
#endif // _di_utf8_defines_
#ifndef _di_utf8_parameters_
- const f_string_static_t utf8_short_from_bytecode_s = macro_f_string_static_t_initialize(UTF8_short_from_bytecode_s, 0, UTF8_short_from_bytecode_s_length);
+ const f_string_static_t utf8_short_from_bytesequence_s = macro_f_string_static_t_initialize(UTF8_short_from_bytesequence_s, 0, UTF8_short_from_bytesequence_s_length);
const f_string_static_t utf8_short_from_codepoint_s = macro_f_string_static_t_initialize(UTF8_short_from_codepoint_s, 0, UTF8_short_from_codepoint_s_length);
const f_string_static_t utf8_short_from_file_s = macro_f_string_static_t_initialize(UTF8_short_from_file_s, 0, UTF8_short_from_file_s_length);
const f_string_static_t utf8_short_strip_invalid_s = macro_f_string_static_t_initialize(UTF8_short_strip_invalid_s, 0, UTF8_short_strip_invalid_s_length);
const f_string_static_t utf8_short_verify_s = macro_f_string_static_t_initialize(UTF8_short_verify_s, 0, UTF8_short_verify_s_length);
- const f_string_static_t utf8_short_to_bytecode_s = macro_f_string_static_t_initialize(UTF8_short_to_bytecode_s, 0, UTF8_short_to_bytecode_s_length);
+ const f_string_static_t utf8_short_to_bytesequence_s = macro_f_string_static_t_initialize(UTF8_short_to_bytesequence_s, 0, UTF8_short_to_bytesequence_s_length);
const f_string_static_t utf8_short_to_codepoint_s = macro_f_string_static_t_initialize(UTF8_short_to_codepoint_s, 0, UTF8_short_to_codepoint_s_length);
const f_string_static_t utf8_short_to_combining_s = macro_f_string_static_t_initialize(UTF8_short_to_combining_s, 0, UTF8_short_to_combining_s_length);
const f_string_static_t utf8_short_to_file_s = macro_f_string_static_t_initialize(UTF8_short_to_file_s, 0, UTF8_short_to_file_s_length);
const f_string_static_t utf8_short_to_width_s = macro_f_string_static_t_initialize(UTF8_short_to_width_s, 0, UTF8_short_to_width_s_length);
- const f_string_static_t utf8_long_from_bytecode_s = macro_f_string_static_t_initialize(UTF8_long_from_bytecode_s, 0, UTF8_long_from_bytecode_s_length);
+ const f_string_static_t utf8_long_from_bytesequence_s = macro_f_string_static_t_initialize(UTF8_long_from_bytesequence_s, 0, UTF8_long_from_bytesequence_s_length);
const f_string_static_t utf8_long_from_codepoint_s = macro_f_string_static_t_initialize(UTF8_long_from_codepoint_s, 0, UTF8_long_from_codepoint_s_length);
const f_string_static_t utf8_long_from_file_s = macro_f_string_static_t_initialize(UTF8_long_from_file_s, 0, UTF8_long_from_file_s_length);
const f_string_static_t utf8_long_strip_invalid_s = macro_f_string_static_t_initialize(UTF8_long_strip_invalid_s, 0, UTF8_long_strip_invalid_s_length);
const f_string_static_t utf8_long_verify_s = macro_f_string_static_t_initialize(UTF8_long_verify_s, 0, UTF8_long_verify_s_length);
- const f_string_static_t utf8_long_to_bytecode_s = macro_f_string_static_t_initialize(UTF8_long_to_bytecode_s, 0, UTF8_long_to_bytecode_s_length);
+ const f_string_static_t utf8_long_to_bytesequence_s = macro_f_string_static_t_initialize(UTF8_long_to_bytesequence_s, 0, UTF8_long_to_bytesequence_s_length);
const f_string_static_t utf8_long_to_codepoint_s = macro_f_string_static_t_initialize(UTF8_long_to_codepoint_s, 0, UTF8_long_to_codepoint_s_length);
const f_string_static_t utf8_long_to_combining_s = macro_f_string_static_t_initialize(UTF8_long_to_combining_s, 0, UTF8_long_to_combining_s_length);
const f_string_static_t utf8_long_to_file_s = macro_f_string_static_t_initialize(UTF8_long_to_file_s, 0, UTF8_long_to_file_s_length);
* The main program parameters.
*/
#ifndef _di_utf8_parameters_
- #define UTF8_short_from_bytecode_s "b"
- #define UTF8_short_from_codepoint_s "c"
- #define UTF8_short_from_file_s "f"
+ #define UTF8_short_from_bytesequence_s "b"
+ #define UTF8_short_from_codepoint_s "c"
+ #define UTF8_short_from_file_s "f"
#define UTF8_short_headers_s "H"
#define UTF8_short_separate_s "S"
#define UTF8_short_strip_invalid_s "s"
#define UTF8_short_verify_s "v"
- #define UTF8_short_to_bytecode_s "B"
- #define UTF8_short_to_codepoint_s "C"
- #define UTF8_short_to_combining_s "O"
- #define UTF8_short_to_file_s "F"
- #define UTF8_short_to_width_s "W"
+ #define UTF8_short_to_bytesequence_s "B"
+ #define UTF8_short_to_codepoint_s "C"
+ #define UTF8_short_to_combining_s "O"
+ #define UTF8_short_to_file_s "F"
+ #define UTF8_short_to_width_s "W"
- #define UTF8_long_from_bytecode_s "from_bytecode"
- #define UTF8_long_from_codepoint_s "from_codepoint"
- #define UTF8_long_from_file_s "from_file"
+ #define UTF8_long_from_bytesequence_s "from_bytesequence"
+ #define UTF8_long_from_codepoint_s "from_codepoint"
+ #define UTF8_long_from_file_s "from_file"
#define UTF8_long_headers_s "headers"
#define UTF8_long_separate_s "separate"
#define UTF8_long_strip_invalid_s "strip_invalid"
#define UTF8_long_verify_s "verify"
- #define UTF8_long_to_bytecode_s "to_bytecode"
- #define UTF8_long_to_codepoint_s "to_codepoint"
- #define UTF8_long_to_combining_s "to_combining"
- #define UTF8_long_to_file_s "to_file"
- #define UTF8_long_to_width_s "to_width"
+ #define UTF8_long_to_bytesequence_s "to_bytesequence"
+ #define UTF8_long_to_codepoint_s "to_codepoint"
+ #define UTF8_long_to_combining_s "to_combining"
+ #define UTF8_long_to_file_s "to_file"
+ #define UTF8_long_to_width_s "to_width"
- #define UTF8_short_from_bytecode_s_length 1
+ #define UTF8_short_from_bytesequence_s_length 1
#define UTF8_short_from_codepoint_s_length 1
#define UTF8_short_from_file_s_length 1
#define UTF8_short_strip_invalid_s_length 1
#define UTF8_short_verify_s_length 1
- #define UTF8_short_to_bytecode_s_length 1
+ #define UTF8_short_to_bytesequence_s_length 1
#define UTF8_short_to_codepoint_s_length 1
#define UTF8_short_to_combining_s_length 1
#define UTF8_short_to_file_s_length 1
#define UTF8_short_to_width_s_length 1
- #define UTF8_long_from_bytecode_s_length 13
- #define UTF8_long_from_codepoint_s_length 14
- #define UTF8_long_from_file_s_length 9
+ #define UTF8_long_from_bytesequence_s_length 17
+ #define UTF8_long_from_codepoint_s_length 14
+ #define UTF8_long_from_file_s_length 9
#define UTF8_long_headers_s_length 7
#define UTF8_long_separate_s_length 8
#define UTF8_long_strip_invalid_s_length 13
#define UTF8_long_verify_s_length 6
- #define UTF8_long_to_bytecode_s_length 11
- #define UTF8_long_to_codepoint_s_length 12
- #define UTF8_long_to_combining_s_length 12
- #define UTF8_long_to_file_s_length 7
- #define UTF8_long_to_width_s_length 8
+ #define UTF8_long_to_bytesequence_s_length 15
+ #define UTF8_long_to_codepoint_s_length 12
+ #define UTF8_long_to_combining_s_length 12
+ #define UTF8_long_to_file_s_length 7
+ #define UTF8_long_to_width_s_length 8
- extern const f_string_static_t utf8_short_from_bytecode_s;
+ extern const f_string_static_t utf8_short_from_bytesequence_s;
extern const f_string_static_t utf8_short_from_codepoint_s;
extern const f_string_static_t utf8_short_from_file_s;
extern const f_string_static_t utf8_short_strip_invalid_s;
extern const f_string_static_t utf8_short_verify_s;
- extern const f_string_static_t utf8_short_to_bytecode_s;
+ extern const f_string_static_t utf8_short_to_bytesequence_s;
extern const f_string_static_t utf8_short_to_codepoint_s;
extern const f_string_static_t utf8_short_to_combining_s;
extern const f_string_static_t utf8_short_to_file_s;
extern const f_string_static_t utf8_short_to_width_s;
- extern const f_string_static_t utf8_long_from_bytecode_s;
+ extern const f_string_static_t utf8_long_from_bytesequence_s;
extern const f_string_static_t utf8_long_from_codepoint_s;
extern const f_string_static_t utf8_long_from_file_s;
extern const f_string_static_t utf8_long_strip_invalid_s;
extern const f_string_static_t utf8_long_verify_s;
- extern const f_string_static_t utf8_long_to_bytecode_s;
+ extern const f_string_static_t utf8_long_to_bytesequence_s;
extern const f_string_static_t utf8_long_to_codepoint_s;
extern const f_string_static_t utf8_long_to_combining_s;
extern const f_string_static_t utf8_long_to_file_s;
utf8_parameter_verbosity_debug_e,
utf8_parameter_version_e,
- utf8_parameter_from_bytecode_e,
+ utf8_parameter_from_bytesequence_e,
utf8_parameter_from_codepoint_e,
utf8_parameter_from_file_e,
utf8_parameter_separate_e,
utf8_parameter_strip_invalid_e,
- utf8_parameter_to_bytecode_e,
+ utf8_parameter_to_bytesequence_e,
utf8_parameter_to_codepoint_e,
utf8_parameter_to_combining_e,
utf8_parameter_to_file_e,
macro_f_console_parameter_t_initialize(f_console_standard_short_verbose_s.string, f_console_standard_long_verbose_s.string, 0, 0, f_console_type_inverse_e), \
macro_f_console_parameter_t_initialize(f_console_standard_short_debug_s.string, f_console_standard_long_debug_s.string, 0, 0, f_console_type_inverse_e), \
macro_f_console_parameter_t_initialize(f_console_standard_short_version_s.string, f_console_standard_long_version_s.string, 0, 0, f_console_type_inverse_e), \
- macro_f_console_parameter_t_initialize(utf8_short_from_bytecode_s.string, utf8_long_from_bytecode_s.string, 0, 0, f_console_type_normal_e), \
+ macro_f_console_parameter_t_initialize(utf8_short_from_bytesequence_s.string, utf8_long_from_bytesequence_s.string, 0, 0, f_console_type_normal_e), \
macro_f_console_parameter_t_initialize(utf8_short_from_codepoint_s.string, utf8_long_from_codepoint_s.string, 0, 0, f_console_type_normal_e), \
macro_f_console_parameter_t_initialize(utf8_short_from_file_s.string, utf8_long_from_file_s.string, 0, 1, f_console_type_normal_e), \
macro_f_console_parameter_t_initialize(utf8_short_headers_s.string, utf8_long_headers_s.string, 0, 0, f_console_type_normal_e), \
macro_f_console_parameter_t_initialize(utf8_short_separate_s.string, utf8_long_headers_s.string, 0, 0, f_console_type_normal_e), \
macro_f_console_parameter_t_initialize(utf8_short_strip_invalid_s.string, utf8_long_strip_invalid_s.string, 0, 0, f_console_type_normal_e), \
- macro_f_console_parameter_t_initialize(utf8_short_to_bytecode_s.string, utf8_long_to_bytecode_s.string, 0, 0, f_console_type_normal_e), \
+ macro_f_console_parameter_t_initialize(utf8_short_to_bytesequence_s.string, utf8_long_to_bytesequence_s.string, 0, 0, f_console_type_normal_e), \
macro_f_console_parameter_t_initialize(utf8_short_to_codepoint_s.string, utf8_long_to_codepoint_s.string, 0, 0, f_console_type_normal_e), \
macro_f_console_parameter_t_initialize(utf8_short_to_combining_s.string, utf8_long_to_combining_s.string, 0, 0, f_console_type_normal_e), \
macro_f_console_parameter_t_initialize(utf8_short_to_file_s.string, utf8_long_to_file_s.string, 0, 1, f_console_type_normal_e), \
* Modes used to designate how to the input and output are to be processed.
*
* utf8_mode_from_*:
- * - bytecode: The input format is bytecode.
+ * - bytesequence: The input format is bytesequence.
* - codepoint: The input format is codepoint (U+XXXX or U+XXXXXX).
*
* utf8_mode_to_*:
- * - bytecode: The outout format is bytecode.
+ * - bytesequence: The outout format is bytesequence.
* - codepoint: The outout format is codepoint (U+XXXX or U+XXXXXX).
* - combining: The outout format is whether or not character is combining (may be used with "width").
* - width: The outout format is how wide the character is (may be used with "combining").
*/
#ifndef _di_utf8_modes_
- #define utf8_mode_from_bytecode_d 0x1
+ #define utf8_mode_from_bytesequence_d 0x1
#define utf8_mode_from_codepoint_d 0x2
- #define utf8_mode_to_bytecode_d 0x4
+ #define utf8_mode_to_bytesequence_d 0x4
#define utf8_mode_to_codepoint_d 0x8
#define utf8_mode_to_combining_d 0x10
#define utf8_mode_to_width_d 0x20
0, \
0, \
f_file_t_initialize, \
- utf8_mode_from_bytecode_d | utf8_mode_to_codepoint_d, \
+ utf8_mode_from_bytesequence_d | utf8_mode_to_codepoint_d, \
f_color_set_t_initialize, \
f_color_set_t_initialize, \
f_string_static_t_initialize, \
extern "C" {
#endif
-#ifndef _di_utf8_print_bytecode_
- void utf8_print_bytecode(utf8_data_t * const data, const f_string_static_t character) {
+#ifndef _di_utf8_print_bytesequence_
+ void utf8_print_bytesequence(utf8_data_t * const data, const f_string_static_t character) {
fl_print_format("%r%r%r", data->file.stream, data->prepend, character, data->append);
}
-#endif // _di_utf8_print_bytecode_
+#endif // _di_utf8_print_bytesequence_
#ifndef _di_utf8_print_character_invalid_
void utf8_print_character_invalid(utf8_data_t * const data, const f_string_static_t character) {
if ((data->mode & utf8_mode_to_combining_d) || (data->mode & utf8_mode_to_width_d)) {
utf8_print_combining_or_width(data, character);
}
- else if (data->mode & utf8_mode_to_bytecode_d) {
+ else if (data->mode & utf8_mode_to_bytesequence_d) {
fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, character, data->valid_not, data->append);
}
else if (data->mode & utf8_mode_from_codepoint_d) {
}
#endif // _di_utf8_print_error_parameter_file_to_too_many_
-#ifndef _di_utf8_print_raw_bytecode_
- void utf8_print_raw_bytecode(utf8_data_t * const data, const f_utf_char_t raw, const uint8_t width) {
+#ifndef _di_utf8_print_raw_bytesequence_
+ void utf8_print_raw_bytesequence(utf8_data_t * const data, const f_utf_char_t raw, const uint8_t width) {
if (data->main->parameters.array[utf8_parameter_strip_invalid_e].result == f_console_result_found_e) return;
if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_found_e) return;
fl_print_format("%r%[%r%]%r", data->file.stream, data->prepend, data->valid_not, character, data->valid_not, data->append);
}
-#endif // _di_utf8_print_raw_bytecode_
+#endif // _di_utf8_print_raw_bytesequence_
#ifndef _di_utf8_print_raw_codepoint_
void utf8_print_raw_codepoint(utf8_data_t * const data, const f_string_static_t raw) {
#endif
/**
- * Print the bytecode character (such as '豸').
+ * Print the bytesequence character (such as '豸').
*
* @param data
* The program data.
* The character to print.
* This is a string that represents a single character.
*/
-#ifndef _di_utf8_print_bytecode_
- extern void utf8_print_bytecode(utf8_data_t * const data, const f_string_static_t character) F_attribute_visibility_internal_d;
-#endif // _di_utf8_print_bytecode_
+#ifndef _di_utf8_print_bytesequence_
+ extern void utf8_print_bytesequence(utf8_data_t * const data, const f_string_static_t character) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_bytesequence_
/**
- * Print an invalid character either as a Unicode codeblock or as a bytecode.
+ * Print an invalid character either as a Unicode codeblock or as a bytesequence.
*
* This handles whether or not the invalid character should be printed or not based on program parameters.
*
#endif // _di_utf8_print_error_parameter_file_to_too_many_
/**
- * Print the raw character data (binary / bytecode).
+ * Print the raw character data (binary / bytesequence).
*
* @param data
* The program data.
* @param width
* The width the raw character represents (a value inclusively from 1 to 4).
*/
-#ifndef _di_utf8_print_raw_bytecode_
- extern void utf8_print_raw_bytecode(utf8_data_t * const data, const f_utf_char_t raw, const uint8_t width) F_attribute_visibility_internal_d;
-#endif // _di_utf8_print_raw_bytecode_
+#ifndef _di_utf8_print_raw_bytesequence_
+ extern void utf8_print_raw_bytesequence(utf8_data_t * const data, const f_utf_char_t raw, const uint8_t width) F_attribute_visibility_internal_d;
+#endif // _di_utf8_print_raw_bytesequence_
/**
* Print the raw character data (codepoint).
#include "private-common.h"
#include "private-print.h"
#include "private-utf8.h"
-#include "private-utf8_bytecode.h"
+#include "private-utf8_bytesequence.h"
#include "private-utf8_codepoint.h"
#ifdef __cplusplus
status = F_none;
- if (data->mode & utf8_mode_from_bytecode_d) {
- status = utf8_convert_bytecode(data, text);
+ if (data->mode & utf8_mode_from_bytesequence_d) {
+ status = utf8_convert_bytesequence(data, text);
}
else {
status = utf8_detect_codepoint(data, text, &mode_codepoint);
utf8_process_text_width(&text);
} // for
- if (F_status_is_error_not(status) && !(data->mode & utf8_mode_from_bytecode_d)) {
+ if (F_status_is_error_not(status) && !(data->mode & utf8_mode_from_bytesequence_d)) {
if (mode_codepoint != utf8_codepoint_mode_ready_e && mode_codepoint != utf8_codepoint_mode_end_e && mode_codepoint != utf8_codepoint_mode_bad_end_e && mode_codepoint != utf8_codepoint_mode_raw_end_e) {
if (mode_codepoint == utf8_codepoint_mode_number_e) {
mode_codepoint = utf8_codepoint_mode_end_e;
* F_false on success and contains invalid sequences.
* F_interrupt on (exit) signal received.
*
- * Errors (with error bit) from: utf8_convert_bytecode()
+ * Errors (with error bit) from: utf8_convert_bytesequence()
* Errors (with error bit) from: utf8_convert_codepoint()
* Errors (with error bit) from: utf8_detect_codepoint()
*
- * @see utf8_convert_bytecode()
+ * @see utf8_convert_bytesequence()
* @see utf8_convert_codepoint()
* @see utf8_detect_codepoint()
* @see utf8_signal_received()
#include "private-common.h"
#include "private-print.h"
#include "private-utf8.h"
-#include "private-utf8_bytecode.h"
+#include "private-utf8_bytesequence.h"
#include "private-utf8_codepoint.h"
#ifdef __cplusplus
extern "C" {
#endif
-#ifndef _di_utf8_convert_bytecode_
- f_status_t utf8_convert_bytecode(utf8_data_t * const data, const f_string_static_t character) {
+#ifndef _di_utf8_convert_bytesequence_
+ f_status_t utf8_convert_bytesequence(utf8_data_t * const data, const f_string_static_t character) {
f_status_t status = F_none;
bool valid_not = F_false;
}
}
else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
- if (data->mode & utf8_mode_to_bytecode_d) {
- utf8_print_bytecode(data, character);
+ if (data->mode & utf8_mode_to_bytesequence_d) {
+ utf8_print_bytesequence(data, character);
}
else if (data->mode & utf8_mode_to_codepoint_d) {
utf8_print_codepoint(data, codepoint);
return F_none;
}
-#endif // _di_utf8_convert_bytecode_
+#endif // _di_utf8_convert_bytesequence_
-#ifndef _di_utf8_process_file_bytecode_
- f_status_t utf8_process_file_bytecode(utf8_data_t * const data, const f_file_t file) {
+#ifndef _di_utf8_process_file_bytesequence_
+ f_status_t utf8_process_file_bytesequence(utf8_data_t * const data, const f_file_t file) {
f_status_t status = F_none;
} // for
if (j == character.used) {
- if (data->mode & utf8_mode_from_bytecode_d) {
- status = utf8_convert_bytecode(data, character);
+ if (data->mode & utf8_mode_from_bytesequence_d) {
+ status = utf8_convert_bytesequence(data, character);
}
else {
status = utf8_detect_codepoint(data, character, &mode_codepoint);
if (F_status_is_error_not(status) && status != F_interrupt && next == F_false) {
character.used = j;
- if (data->mode & utf8_mode_from_bytecode_d) {
- status = utf8_convert_bytecode(data, character);
+ if (data->mode & utf8_mode_from_bytesequence_d) {
+ status = utf8_convert_bytesequence(data, character);
}
else {
status = utf8_detect_codepoint(data, character, &mode_codepoint);
return valid;
}
-#endif // _di_utf8_process_file_bytecode_
+#endif // _di_utf8_process_file_bytesequence_
#ifdef __cplusplus
} // extern "C"
* API Version: 0.5
* Licenses: lgpl-2.1-or-later
*/
-#ifndef _PRIVATE_utf8_bytecode_h
-#define _PRIVATE_utf8_bytecode_h
+#ifndef _PRIVATE_utf8_bytesequence_h
+#define _PRIVATE_utf8_bytesequence_h
#ifdef __cplusplus
extern "C" {
#endif
/**
- * Convert a bytecode character to another format.
+ * Convert a bytesequence character to another format.
*
* This automatically determines the output format and is also handles the verify process.
*
*
* Errors (with error bit) from: f_utf_unicode_to()
*/
-#ifndef _di_utf8_convert_bytecode_
- extern f_status_t utf8_convert_bytecode(utf8_data_t * const data, const f_string_static_t character) F_attribute_visibility_internal_d;
-#endif // _di_utf8_convert_bytecode_
+#ifndef _di_utf8_convert_bytesequence_
+ extern f_status_t utf8_convert_bytesequence(utf8_data_t * const data, const f_string_static_t character) F_attribute_visibility_internal_d;
+#endif // _di_utf8_convert_bytesequence_
/**
- * Process file as a bytecode input, handling conversion or verification as appropriate.
+ * Process file as a bytesequence input, handling conversion or verification as appropriate.
*
* @param data
* The program data.
* F_false on success and contains invalid sequences.
* F_interrupt on (exit) signal received.
*
- * Errors (with error bit) from: utf8_convert_bytecode()
+ * Errors (with error bit) from: utf8_convert_bytesequence()
* Errors (with error bit) from: utf8_detect_codepoint()
*
* @see fll_program_standard_signal_received()
- * @see utf8_convert_bytecode()
+ * @see utf8_convert_bytesequence()
* @see utf8_detect_codepoint()
*/
-#ifndef _di_utf8_process_file_bytecode_
- extern f_status_t utf8_process_file_bytecode(utf8_data_t * const data, const f_file_t file) F_attribute_visibility_internal_d;
-#endif // _di_utf8_process_file_bytecode_
+#ifndef _di_utf8_process_file_bytesequence_
+ extern f_status_t utf8_process_file_bytesequence(utf8_data_t * const data, const f_file_t file) F_attribute_visibility_internal_d;
+#endif // _di_utf8_process_file_bytesequence_
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // _PRIVATE_utf8_bytecode_h
+#endif // _PRIVATE_utf8_bytesequence_h
#include "private-common.h"
#include "private-print.h"
#include "private-utf8.h"
-#include "private-utf8_bytecode.h"
+#include "private-utf8_bytesequence.h"
#include "private-utf8_codepoint.h"
#ifdef __cplusplus
}
}
else if (data->main->parameters.array[utf8_parameter_verify_e].result == f_console_result_none_e) {
- if (data->mode & utf8_mode_to_bytecode_d) {
+ if (data->mode & utf8_mode_to_bytesequence_d) {
f_char_t byte[4] = { 0, 0, 0, 0 };
f_string_static_t character = macro_f_string_static_t_initialize(byte, 0, 4);
status = F_none;
character.used = macro_f_utf_byte_width(character.string[0]);
- utf8_print_bytecode(data, character);
+ utf8_print_bytesequence(data, character);
}
}
else if (data->mode & utf8_mode_to_codepoint_d) {
++width;
}
- if (data->mode & utf8_mode_to_bytecode_d) {
- utf8_print_raw_bytecode(data, raw, width);
+ if (data->mode & utf8_mode_to_bytesequence_d) {
+ utf8_print_raw_bytesequence(data, raw, width);
}
else if (data->mode & utf8_mode_to_codepoint_d) {
utf8_print_raw_codepoint(data, data->text);
} // for
if (j == character.used) {
- if (data->mode & utf8_mode_from_bytecode_d) {
- status = utf8_convert_bytecode(data, character);
+ if (data->mode & utf8_mode_from_bytesequence_d) {
+ status = utf8_convert_bytesequence(data, character);
}
else {
status = utf8_detect_codepoint(data, character, &mode_codepoint);
if (F_status_is_error_not(status) && status != F_interrupt && next == F_false) {
character.used = j;
- if (data->mode & utf8_mode_from_bytecode_d) {
- status = utf8_convert_bytecode(data, character);
+ if (data->mode & utf8_mode_from_bytesequence_d) {
+ status = utf8_convert_bytesequence(data, character);
}
else {
status = utf8_detect_codepoint(data, character, &mode_codepoint);
* F_false on success and contains invalid sequences.
* F_interrupt on (exit) signal received.
*
- * Errors (with error bit) from: utf8_convert_bytecode()
+ * Errors (with error bit) from: utf8_convert_bytesequence()
* Errors (with error bit) from: utf8_detect_codepoint()
*
* @see fll_program_standard_signal_received()
- * @see utf8_convert_bytecode()
+ * @see utf8_convert_bytesequence()
* @see utf8_detect_codepoint()
*/
#ifndef _di_utf8_process_file_codepoint_
#include "private-common.h"
#include "private-print.h"
#include "private-utf8.h"
-#include "private-utf8_bytecode.h"
+#include "private-utf8_bytesequence.h"
#include "private-utf8_codepoint.h"
#ifdef __cplusplus
f_print_dynamic_raw(f_string_eol_s, file.stream);
- fll_program_print_help_option(file, context, utf8_short_from_bytecode_s, utf8_long_from_bytecode_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " The expected input format is byte code (character data).");
- fll_program_print_help_option(file, context, utf8_short_from_codepoint_s, utf8_long_from_codepoint_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, "The expected input format is codepoint (such as U+0000).");
- fll_program_print_help_option(file, context, utf8_short_from_file_s, utf8_long_from_file_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Use the given file as the input source.");
+ fll_program_print_help_option(file, context, utf8_short_from_bytesequence_s, utf8_long_from_bytesequence_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, "The expected input format is byte sequence (character data).");
+ fll_program_print_help_option(file, context, utf8_short_from_codepoint_s, utf8_long_from_codepoint_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " The expected input format is codepoint (such as U+0000).");
+ fll_program_print_help_option(file, context, utf8_short_from_file_s, utf8_long_from_file_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Use the given file as the input source.");
f_print_dynamic_raw(f_string_eol_s, file.stream);
- fll_program_print_help_option(file, context, utf8_short_to_bytecode_s, utf8_long_to_bytecode_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " The output format is byte code (character data).");
- fll_program_print_help_option(file, context, utf8_short_to_codepoint_s, utf8_long_to_codepoint_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, "The output format is codepoint (such as U+0000).");
- fll_program_print_help_option(file, context, utf8_short_to_combining_s, utf8_long_to_combining_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, "The output format is to print whether or not character is combining or not.");
- fll_program_print_help_option(file, context, utf8_short_to_file_s, utf8_long_to_file_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Use the given file as the output destination.");
- fll_program_print_help_option(file, context, utf8_short_to_width_s, utf8_long_to_width_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " The output format is to print the width of a character (either 0, 1, or 2).");
+ fll_program_print_help_option(file, context, utf8_short_to_bytesequence_s, utf8_long_to_bytesequence_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, "The output format is byte sequence (character data).");
+ fll_program_print_help_option(file, context, utf8_short_to_codepoint_s, utf8_long_to_codepoint_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " The output format is codepoint (such as U+0000).");
+ fll_program_print_help_option(file, context, utf8_short_to_combining_s, utf8_long_to_combining_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " The output format is to print whether or not character is combining or not.");
+ fll_program_print_help_option(file, context, utf8_short_to_file_s, utf8_long_to_file_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " Use the given file as the output destination.");
+ fll_program_print_help_option(file, context, utf8_short_to_width_s, utf8_long_to_width_s, f_console_symbol_short_enable_s, f_console_symbol_long_enable_s, " The output format is to print the width of a character (either 0, 1, or 2).");
f_print_dynamic_raw(f_string_eol_s, file.stream);
fll_program_print_help_usage(file, context, utf8_program_name_s, utf8_program_help_parameters_s);
- fl_print_format(" The default behavior is to assume the expected input is byte code from the command line to be output to the screen as codepoints.%r%r", file.stream, f_string_eol_s, f_string_eol_s);
+ fl_print_format(" The default behavior is to assume the expected input is byte sequence from the command line to be output to the screen as codepoints.%r%r", file.stream, f_string_eol_s, f_string_eol_s);
fl_print_format(" Multiple input sources are allowed but only a single output destination is allowed.%r%r", file.stream, f_string_eol_s, f_string_eol_s);
// Identify and prioritize from mode parameters.
{
- f_console_parameter_id_t ids[2] = { utf8_parameter_from_bytecode_e, utf8_parameter_from_codepoint_e };
+ f_console_parameter_id_t ids[2] = { utf8_parameter_from_bytesequence_e, utf8_parameter_from_codepoint_e };
f_console_parameter_id_t choice = 0;
const f_console_parameter_ids_t choices = macro_f_console_parameter_ids_t_initialize(ids, 2);
return status;
}
- if (choice == utf8_parameter_from_bytecode_e) {
+ if (choice == utf8_parameter_from_bytesequence_e) {
if (data.mode & utf8_mode_from_codepoint_d) {
data.mode -= utf8_mode_from_codepoint_d;
}
- data.mode |= utf8_mode_from_bytecode_d;
+ data.mode |= utf8_mode_from_bytesequence_d;
}
else if (choice == utf8_parameter_from_codepoint_e) {
- if (data.mode & utf8_mode_from_bytecode_d) {
- data.mode -= utf8_mode_from_bytecode_d;
+ if (data.mode & utf8_mode_from_bytesequence_d) {
+ data.mode -= utf8_mode_from_bytesequence_d;
}
data.mode |= utf8_mode_from_codepoint_d;
// Identify and prioritize to mode parameters.
{
- f_console_parameter_id_t ids[4] = { utf8_parameter_to_bytecode_e, utf8_parameter_to_codepoint_e, utf8_parameter_to_combining_e, utf8_parameter_to_width_e };
+ f_console_parameter_id_t ids[4] = { utf8_parameter_to_bytesequence_e, utf8_parameter_to_codepoint_e, utf8_parameter_to_combining_e, utf8_parameter_to_width_e };
f_console_parameter_id_t choice = 0;
const f_console_parameter_ids_t choices = macro_f_console_parameter_ids_t_initialize(ids, 4);
return status;
}
- if (choice == utf8_parameter_to_bytecode_e) {
+ if (choice == utf8_parameter_to_bytesequence_e) {
if (data.mode & utf8_mode_to_codepoint_d) {
data.mode -= utf8_mode_to_codepoint_d;
}
data.mode -= utf8_mode_to_width_d;
}
- data.mode |= utf8_mode_to_bytecode_d;
+ data.mode |= utf8_mode_to_bytesequence_d;
}
else if (choice == utf8_parameter_to_codepoint_e) {
- if (data.mode & utf8_mode_to_bytecode_d) {
- data.mode -= utf8_mode_to_bytecode_d;
+ if (data.mode & utf8_mode_to_bytesequence_d) {
+ data.mode -= utf8_mode_to_bytesequence_d;
}
if (data.mode & utf8_mode_to_combining_d) {
data.mode |= utf8_mode_to_codepoint_d;
}
else if (choice == utf8_parameter_to_combining_e) {
- if (data.mode & utf8_mode_to_bytecode_d) {
- data.mode -= utf8_mode_to_bytecode_d;
+ if (data.mode & utf8_mode_to_bytesequence_d) {
+ data.mode -= utf8_mode_to_bytesequence_d;
}
if (data.mode & utf8_mode_to_codepoint_d) {
data.mode |= utf8_mode_to_combining_d;
}
else if (choice == utf8_parameter_to_width_e) {
- if (data.mode & utf8_mode_to_bytecode_d) {
- data.mode -= utf8_mode_to_bytecode_d;
+ if (data.mode & utf8_mode_to_bytesequence_d) {
+ data.mode -= utf8_mode_to_bytesequence_d;
}
if (data.mode & utf8_mode_to_codepoint_d) {
status = F_status_set_error(F_parameter);
}
- if (!(data.mode & utf8_mode_to_bytecode_d)) {
+ if (!(data.mode & utf8_mode_to_bytesequence_d)) {
if (main->parameters.array[utf8_parameter_separate_e].result == f_console_result_found_e || main->parameters.array[utf8_parameter_headers_e].result == f_console_result_found_e) {
data.prepend = utf8_string_prepend_padding_s;
data.append = f_string_eol_s;
utf8_print_section_header_pipe(&data);
- if (data.mode & utf8_mode_from_bytecode_d) {
- status = utf8_process_file_bytecode(&data, file);
+ if (data.mode & utf8_mode_from_bytesequence_d) {
+ status = utf8_process_file_bytesequence(&data, file);
}
else {
status = utf8_process_file_codepoint(&data, file);
}
if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment && F_status_set_fine(status) != F_complete_not_utf) {
- fll_error_file_print(main->error, F_status_set_fine(status), data.mode & utf8_mode_from_bytecode_d ? "utf8_process_file_bytecode" : "utf8_process_file_codepoint", F_true, f_string_empty_s, f_file_operation_process_s, fll_error_file_type_pipe_e);
+ fll_error_file_print(main->error, F_status_set_fine(status), data.mode & utf8_mode_from_bytesequence_d ? "utf8_process_file_bytesequence" : "utf8_process_file_codepoint", F_true, f_string_empty_s, f_file_operation_process_s, fll_error_file_type_pipe_e);
}
}
break;
}
- if (data.mode & utf8_mode_from_bytecode_d) {
- status = utf8_process_file_bytecode(&data, file);
+ if (data.mode & utf8_mode_from_bytesequence_d) {
+ status = utf8_process_file_bytesequence(&data, file);
}
else {
status = utf8_process_file_codepoint(&data, file);
}
if (F_status_is_error(status) && F_status_set_fine(status) != F_utf_fragment && F_status_set_fine(status) != F_complete_not_utf) {
- fll_error_file_print(main->error, F_status_set_fine(status), data.mode & utf8_mode_from_bytecode_d ? "utf8_process_file_bytecode" : "utf8_process_file_codepoint", F_true, data.argv[index], f_file_operation_process_s, fll_error_file_type_file_e);
+ fll_error_file_print(main->error, F_status_set_fine(status), data.mode & utf8_mode_from_bytesequence_d ? "utf8_process_file_bytesequence" : "utf8_process_file_codepoint", F_true, data.argv[index], f_file_operation_process_s, fll_error_file_type_file_e);
break;
}
* This is intendend to support Unicode 14.0.
*
* This is a program for handling basic UTF-8 related conversions.
- * - Convert from UTF-8 character to bytecode.
- * - Convert from Unicode Codepoint (such as U+0000) to bytecode.
- * - Convert from UTF-8 bytecode to character.
- * - Convert from UTF-8 bytecode to Unicode Codepoint (such as U+0000).
+ * - Convert from UTF-8 character to bytesequence.
+ * - Convert from Unicode Codepoint (such as U+0000) to bytesequence.
+ * - Convert from UTF-8 bytesequence to character.
+ * - Convert from UTF-8 bytesequence to Unicode Codepoint (such as U+0000).
*/
#ifndef _utf8_h
#define _utf8_h
build_libraries-level -lfll_2 -lfll_1 -lfll_0
build_libraries-monolithic -lfll
-build_sources_library utf8.c common.c private-common.c private-print.c private-utf8.c private-utf8_bytecode.c private-utf8_codepoint.c
+build_sources_library utf8.c common.c private-common.c private-print.c private-utf8.c private-utf8_bytesequence.c private-utf8_codepoint.c
build_sources_program main.c