From e7937372ce3520b43b8f0d52b7516cd588ef42f6 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Sun, 22 May 2022 20:47:53 -0500 Subject: [PATCH] Update: Tweak endiannes for f_utf_char_t. I continue to forget that the f_utf_char_t is a big-endian format regardless of the host endiannes. I then end up comparing the endiannes logic to normal operations and find discrepancies. I waste a good bit of time to ultimately realize that the f_utf_char_t is not in host byte order. Update the comments to better represent this situation. I also noticed that the big endian bitwise operations are going in the wrong directory. I could be wrong, but I think I need to do a left shift rather than a right shift. Or perhaps, this only needs to be done on a big-endian system? I need to test this logic on a big endian system. --- level_0/f_utf/c/utf/common.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/level_0/f_utf/c/utf/common.h b/level_0/f_utf/c/utf/common.h index edc204b..ab11825 100644 --- a/level_0/f_utf/c/utf/common.h +++ b/level_0/f_utf/c/utf/common.h @@ -158,6 +158,9 @@ extern "C" { * The byte structure is intended to be read left to right in memory regardless of system endianness. * This is done so that the first character (the left most) can be read naturally as a string, such as string[0] = first character. * + * On little-endian systems, the hex-string 0xff is represented as internally as 0x000000ff. + * This needs to be converted into the internal representation of 0xff000000 to be properly represented as a "f_utf_char_t". + * * The macro_f_utf_char_t_mask_byte_* are used to get the entire character set fo a given width. * * The macro_f_utf_char_t_mask_char_* are used to get a specific UTF-8 block as a single character range. @@ -182,22 +185,22 @@ extern "C" { #define macro_f_utf_char_t_initialize(code) code #ifdef _is_F_endian_big - #define F_utf_char_mask_byte_1_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 - #define F_utf_char_mask_byte_2_d 0x0000ffff // 0000 0000, 0000 0000, 1111 1111, 1111 1111 - #define F_utf_char_mask_byte_3_d 0x00ffffff // 0000 0000, 1111 1111, 1111 1111, 1111 1111 + #define F_utf_char_mask_byte_1_d 0x000000ff // 1111 1111, 0000 0000, 0000 0000, 0000 0000 + #define F_utf_char_mask_byte_2_d 0x0000ffff // 1111 1111, 1111 1111, 0000 0000, 0000 0000 + #define F_utf_char_mask_byte_3_d 0x00ffffff // 1111 1111, 1111 1111, 1111 1111, 0000 0000 #define F_utf_char_mask_byte_4_d 0xffffffff // 1111 1111, 1111 1111, 1111 1111, 1111 1111 - #define F_utf_char_mask_char_1_d 0x000000ff // 0000 0000, 0000 0000, 0000 0000, 1111 1111 - #define F_utf_char_mask_char_2_d 0x0000ff00 // 0000 0000, 0000 0000, 1111 1111, 0000 0000 - #define F_utf_char_mask_char_3_d 0x00ff0000 // 0000 0000, 1111 1111, 0000 0000, 0000 0000 - #define F_utf_char_mask_char_4_d 0xff000000 // 1111 1111, 0000 0000, 0000 0000, 0000 0000 + #define F_utf_char_mask_char_1_d 0x000000ff // 1111 1111, 0000 0000, 0000 0000, 0000 0000 + #define F_utf_char_mask_char_2_d 0x0000ff00 // 0000 0000, 1111 1111, 0000 0000, 0000 0000 + #define F_utf_char_mask_char_3_d 0x00ff0000 // 0000 0000, 0000 0000, 1111 1111, 0000 0000 + #define F_utf_char_mask_char_4_d 0xff000000 // 0000 0000, 0000 0000, 0000 0000, 1111 1111 - #define macro_f_utf_char_t_to_char_1(character) (((character) & F_utf_char_mask_char_1_d)) // Grab first byte. - #define macro_f_utf_char_t_to_char_2(character) (((character) & F_utf_char_mask_char_2_d) >> 8) // Grab second byte. - #define macro_f_utf_char_t_to_char_3(character) (((character) & F_utf_char_mask_char_3_d) >> 16) // Grab third byte. - #define macro_f_utf_char_t_to_char_4(character) (((character) & F_utf_char_mask_char_4_d) >> 24) // Grab fourth byte. + #define macro_f_utf_char_t_to_char_1(character) ((character) & F_utf_char_mask_char_1_d) // Grab first byte. + #define macro_f_utf_char_t_to_char_2(character) (((character) & F_utf_char_mask_char_2_d) << 8) // Grab second byte. + #define macro_f_utf_char_t_to_char_3(character) (((character) & F_utf_char_mask_char_3_d) << 16) // Grab third byte. + #define macro_f_utf_char_t_to_char_4(character) (((character) & F_utf_char_mask_char_4_d) << 24) // Grab fourth byte. - #define macro_f_utf_char_t_from_char_1(character) (((character)) & F_utf_char_mask_char_1_d) // Shift to first byte. + #define macro_f_utf_char_t_from_char_1(character) ((character) & F_utf_char_mask_char_1_d) // Shift to first byte. #define macro_f_utf_char_t_from_char_2(character) (((character) << 8) & F_utf_char_mask_char_2_d) // Shift to second byte. #define macro_f_utf_char_t_from_char_3(character) (((character) << 16) & F_utf_char_mask_char_3_d) // Shift to third byte. #define macro_f_utf_char_t_from_char_4(character) (((character) << 24) & F_utf_char_mask_char_4_d) // Shift to fourth byte. -- 1.8.3.1