From d161162f054db2826722523fcd91fb90352c5bbb Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Mon, 4 May 2020 06:47:38 -0500 Subject: [PATCH] Feature: add fl_utf_file project This is intended to be an f_utf_character handling version of fl_file project. This f_utf_character is a 4-byte wide storage for a single UTF-8 character. I accidentally committed the utf_file build settings in a previous commit: f69e57a. --- level_1/fl_utf_file/c/utf_file.c | 281 ++++++++++++++++++++++++++++ level_1/fl_utf_file/c/utf_file.h | 124 ++++++++++++ level_1/fl_utf_file/data/build/dependencies | 6 + level_1/fl_utf_file/data/build/settings | 30 +++ 4 files changed, 441 insertions(+) create mode 100644 level_1/fl_utf_file/c/utf_file.c create mode 100644 level_1/fl_utf_file/c/utf_file.h create mode 100644 level_1/fl_utf_file/data/build/dependencies create mode 100644 level_1/fl_utf_file/data/build/settings diff --git a/level_1/fl_utf_file/c/utf_file.c b/level_1/fl_utf_file/c/utf_file.c new file mode 100644 index 0000000..7118745 --- /dev/null +++ b/level_1/fl_utf_file/c/utf_file.c @@ -0,0 +1,281 @@ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _di_fl_utf_file_read_ + f_return_status fl_utf_file_read(f_file *file, f_utf_string_dynamic *buffer) { + #ifndef _di_level_1_parameter_checking_ + if (file == 0) return f_status_set_error(f_invalid_parameter); + if (buffer == 0) return f_status_set_error(f_invalid_parameter); + if (buffer->used >= buffer->size) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_1_parameter_checking_ + + if (file->address == 0) return f_status_set_error(f_file_not_open); + + f_status status = f_none; + + f_string_length i = 0; + f_string_length total = 0; + + int result = 0; + const f_number_unsigned bytes_total = file->size_block * file->size_chunk; + + uint8_t width = 0; + uint8_t buffer_read[bytes_total]; + f_utf_character character = 0; + + for (;;) { + if (buffer->used + bytes_total >= buffer->used) { + if (buffer->used + bytes_total > f_utf_string_max_size) return f_status_set_error(f_string_too_large); + + f_macro_string_dynamic_resize(status, (*buffer), buffer->used + bytes_total); + + if (f_status_is_error(status)) return status; + } + + memset(&buffer_read, 0, bytes_total); + + result = fread(buffer->string + buffer->used, file->size_chunk, file->size_block, file->address); + + if (file->address == 0) return f_status_set_error(f_file_error_read); + if (ferror(file->address) != 0) return f_status_set_error(f_file_error_read); + + total = result * file->size_chunk; + + for (i = 0; i < total; i += width) { + width = f_macro_utf_byte_width(buffer_read[i]); + + if (i + width > total) return f_status_set_error(f_incomplete_utf_on_eof); + + character = f_macro_utf_character_from_char_1(buffer_read[i]); + + if (width > 1 && i + 1 < total) { + character |= f_macro_utf_character_from_char_2(buffer_read[i]); + + if (width > 2 && i + 2 < total) { + character |= f_macro_utf_character_from_char_3(buffer_read[i]); + + if (width > 3 && i + 3 < total) { + character |= f_macro_utf_character_from_char_4(buffer_read[i]); + } + } + } + + buffer->string[i] = character; + buffer->used++; + } // for + + if (feof(file->address)) return f_none_on_eof; + } // for + + return status; + } +#endif // _di_fl_utf_file_read_ + +#ifndef _di_fl_utf_file_read_position + f_return_status fl_utf_file_read_position(f_file *file, f_utf_string_dynamic *buffer, const f_file_position position) { + #ifndef _di_level_1_parameter_checking_ + if (file == 0) return f_status_set_error(f_invalid_parameter); + if (buffer == 0) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_1_parameter_checking_ + + if (file->address == 0) return f_status_set_error(f_file_not_open); + + int result = 0; + + // first seek to 'where' we need to begin the read. + { + long current_file_position = ftell(file->address); + + if (current_file_position == -1) return f_status_set_error(f_file_error_seek); + + if (current_file_position > position.start) { + result = f_macro_file_seek_to(file->address, file->size_chunk * (0 - (current_file_position - position.start))); + } + else if (current_file_position < position.start) { + result = f_macro_file_seek_to(file->address, file->size_chunk * (position.start - current_file_position)); + } + + if (result != 0) return f_status_set_error(f_file_error_seek); + } + + f_status status = f_none; + bool infinite = f_false; + + f_string_length i = 0; + f_string_length total = 0; + + f_number_unsigned bytes_total; + + // when total is 0, this means the file read will until EOF is reached. + if (position.total == 0) { + infinite = f_true; + bytes_total = file->size_block * file->size_chunk; + } + else { + bytes_total = position.total * file->size_chunk; + } + + uint8_t width = 0; + uint8_t buffer_read[bytes_total]; + f_utf_character character = 0; + + do { + if (buffer->used + bytes_total > buffer->size) { + if (buffer->used + bytes_total > f_string_max_size) return f_status_set_error(f_string_too_large); + + f_macro_string_dynamic_resize(status, (*buffer), buffer->used + bytes_total); + + if (f_status_is_error(status)) return status; + } + + if (position.total == 0) { + result = fread(buffer->string + buffer->used, file->size_chunk, file->size_block, file->address); + } + else { + result = fread(buffer->string + buffer->used, file->size_chunk, position.total, file->address); + } + + if (file->address == 0) return f_status_set_error(f_file_error_read); + if (ferror(file->address) != 0) return f_status_set_error(f_file_error_read); + + total = result * file->size_chunk; + + for (i = 0; i < total; i += width) { + width = f_macro_utf_byte_width(buffer_read[i]); + + if (i + width > total) return f_status_set_error(f_incomplete_utf_on_eof); + + character = f_macro_utf_character_from_char_1(buffer_read[i]); + + if (width > 1 && i + 1 < total) { + character |= f_macro_utf_character_from_char_2(buffer_read[i]); + + if (width > 2 && i + 2 < total) { + character |= f_macro_utf_character_from_char_3(buffer_read[i]); + + if (width > 3 && i + 3 < total) { + character |= f_macro_utf_character_from_char_4(buffer_read[i]); + } + } + } + + buffer->string[i] = character; + buffer->used++; + } // for + + if (feof(file->address)) return f_none_on_eof; + } while (infinite); + + return status; + } +#endif // _di_fl_utf_file_read_position + +#ifndef _di_fl_utf_file_write_ + f_return_status fl_utf_file_write(f_file *file, const f_utf_string_dynamic buffer) { + #ifndef _di_level_1_parameter_checking_ + if (file == 0) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_1_parameter_checking_ + + if (file->address == 0) return f_status_set_error(f_file_not_open); + + f_string_length total = 0; + f_string_length last = 0; + f_string_length used = 0; + f_string_length i = 0; + + size_t written = 0; + uint8_t buffer_write[f_file_default_write_size]; + uint8_t width = 0; + + do { + memset(&buffer_write[f_file_default_write_size], 0, f_file_default_write_size); + + for (i = 0; used < f_file_default_write_size && total + i < buffer.used; i++, used += width) { + width = f_macro_utf_character_width(buffer.string[total + i]); + + buffer_write[used] = f_macro_utf_character_to_char_1(buffer.string[total + i]); + + if (width > 1) { + buffer_write[used + 1] = f_macro_utf_character_to_char_2(buffer.string[total + i]); + + if (width > 2) { + buffer_write[used + 2] = f_macro_utf_character_to_char_3(buffer.string[total + i]); + + if (width > 3) { + buffer_write[used + 3] = f_macro_utf_character_to_char_4(buffer.string[total + i]); + } + } + } + } // for + + written = fwrite(buffer.string + last, file->size_chunk, used, file->address); + + if (written < i * file->size_chunk) return f_status_set_error(f_file_error_write); + + total += i; + last += used; + } while (total < buffer.used); + + return f_none; + } +#endif // _di_fl_utf_file_write_ + +#ifndef _di_fl_utf_file_write_position_ + f_return_status fl_utf_file_write_position(f_file *file, const f_utf_string_dynamic buffer, const f_utf_string_location position) { + #ifndef _di_level_1_parameter_checking_ + if (file == 0) return f_status_set_error(f_invalid_parameter); + if (position.start < position.stop) return f_status_set_error(f_invalid_parameter); + #endif // _di_level_1_parameter_checking_ + + if (file->address == 0) return f_file_not_open; + + f_string_length total = 0; + f_string_length last = position.start; + f_string_length used = 0; + f_string_length i = 0; + + size_t written = 0; + uint8_t buffer_write[f_file_default_write_size]; + uint8_t width = 0; + + const f_string_length max = buffer.used - (position.stop - position.start + 1); + + do { + memset(&buffer_write[f_file_default_write_size], 0, f_file_default_write_size); + + for (i = 0; used < f_file_default_write_size && total + i < max; i++, used += width) { + width = f_macro_utf_character_width(buffer.string[total + i]); + + buffer_write[used] = f_macro_utf_character_to_char_1(buffer.string[total + i]); + + if (width > 1) { + buffer_write[used + 1] = f_macro_utf_character_to_char_2(buffer.string[total + i]); + + if (width > 2) { + buffer_write[used + 2] = f_macro_utf_character_to_char_3(buffer.string[total + i]); + + if (width > 3) { + buffer_write[used + 3] = f_macro_utf_character_to_char_4(buffer.string[total + i]); + } + } + } + } // for + + written = fwrite(buffer.string + last, file->size_chunk, used, file->address); + + if (written < i * file->size_chunk) return f_status_set_error(f_file_error_write); + + total += i; + last += used; + } while (total < max); + + return f_none; + } +#endif // _di_fl_utf_file_write_position_ + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/level_1/fl_utf_file/c/utf_file.h b/level_1/fl_utf_file/c/utf_file.h new file mode 100644 index 0000000..ca97550 --- /dev/null +++ b/level_1/fl_utf_file/c/utf_file.h @@ -0,0 +1,124 @@ +/** + * FLL - Level 1 + * + * Project: Utf File + * API Version: 0.5 + * Licenses: lgplv2.1 + * + * File Operations. + */ +#ifndef _FL_utf_file_h +#define _FL_utf_file_h + +// libc includes +#include + +// fll-0 includes +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Load entire file into the UTF-8 buffer. + * + * This does not validate the UTF-8 codes. + * + * @param file + * The file to read from. + * @param buffer + * The buffer to load the file into. + * + * @return + * f_none on success. + * f_none_on_eof on success and EOF was reached. + * f_file_not_open (with error bit) if file is not open. + * f_file_error_seek (with error bit) if file seek failed. + * f_file_error_read (with error bit) if file read failed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * f_error_reallocation (with error bit) on memory reallocation error. + * f_string_too_large (with error bit) if string is too large to fit into buffer. + * f_incomplete_utf_on_eof (with error bit) if UTF-8 character was incomplete at the end of the file. + */ +#ifndef _di_fl_utf_file_read_ + extern f_return_status fl_utf_file_read(f_file *file, f_utf_string_dynamic *buffer); +#endif // _di_fl_utf_file_read_ + +/** + * Load file into the UTF-8 buffer, based on specified positions. + * + * This does not validate the UTF-8 codes. + * + * @param file + * The file to read from. + * @param buffer + * The buffer to save the file. + * @param position + * The file position to base reading off of. + * + * @return + * f_none on success. + * f_none_on_eof on success and EOF was reached. + * f_file_not_open (with error bit) if file is not open. + * f_file_error_seek (with error bit) if file seek failed. + * f_file_error_read (with error bit) if file read failed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + * f_error_reallocation (with error bit) on memory reallocation error. + * f_string_too_large (with error bit) if string is too large to fit into buffer. + * f_incomplete_utf_on_eof (with error bit) if UTF-8 character was incomplete at the end of the file. + */ +#ifndef _di_fl_utf_file_read_position_ + extern f_return_status fl_utf_file_read_position(f_file *file, f_utf_string_dynamic *buffer, const f_file_position position); +#endif // _di_fl_utf_file_read_position + +/** + * Save entire UTF-8 buffer into file. + * + * This does not validate the UTF-8 codes. + * + * @param file + * The file to save to. + * @param buffer + * The buffer to save to the file. + * + * @return + * f_none on success. + * f_file_not_open (with error bit) if file is not open. + * f_file_error_write (with error bit) if write failed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + */ +#ifndef _di_fl_utf_file_write_ + extern f_return_status fl_utf_file_write(f_file *file, const f_utf_string_dynamic buffer); +#endif // _di_fl_utf_file_write_ + +/** + * Save entire UTF-8 buffer into file, based on specified positions. + * + * This does not validate the UTF-8 codes. + * + * @param file + * The file to save to. + * @param buffer + * The buffer to save to the file. + * @param position + * The file position to base writing off of. + * + * @return + * f_none on success. + * f_file_not_open (with error bit) if file is not open. + * f_file_error_write (with error bit) if write failed. + * f_invalid_parameter (with error bit) if a parameter is invalid. + */ +#ifndef _di_fl_utf_file_write_position_ + extern f_return_status fl_utf_file_write_position(f_file *file, const f_utf_string_dynamic buffer, const f_utf_string_location position); +#endif // _di_fl_utf_file_write_position_ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // _FL_utf_file_h diff --git a/level_1/fl_utf_file/data/build/dependencies b/level_1/fl_utf_file/data/build/dependencies new file mode 100644 index 0000000..9d424c7 --- /dev/null +++ b/level_1/fl_utf_file/data/build/dependencies @@ -0,0 +1,6 @@ +f_type +f_status +f_memory +f_string +f_file +f_utf diff --git a/level_1/fl_utf_file/data/build/settings b/level_1/fl_utf_file/data/build/settings new file mode 100644 index 0000000..e0cb76d --- /dev/null +++ b/level_1/fl_utf_file/data/build/settings @@ -0,0 +1,30 @@ +# fss-0000 + +project_name fl_utf_file +project_level 1 + +version_major 0 +version_minor 5 +version_micro 0 + +build_compiler gcc +build_linker ar +build_libraries -lc +build_libraries_fll -lf_file -lf_utf -lf_memory +build_sources_library utf_file.c +build_sources_program +build_sources_headers utf_file.h +build_sources_bash +build_sources_settings +build_shared yes +build_static yes + +defines_all +defines_static +defines_shared + +flags_all -z now -g +flags_shared +flags_static +flags_library -fPIC +flags_program -fPIE -- 1.8.3.1