From c19060537c4136665b665c96a932291d53bfbda1 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Sun, 6 Feb 2022 21:03:52 -0600 Subject: [PATCH] Bugfix: File stream read inefficiency, allocation f_string_t instead of char, and actually use state.step_small. The file stream reader requires the buffer to be pre-allocated. Prevent the resize from resizing an extra time if the resulting size read is smaller than the requested size. The caller can then optimize this by setting the read size to 1 digit larger than the actual file size. Also switch to fread_unlocked() and handle the locks manually. The strings are being allocated as f_string_t. The f_string_t type definition is actually a "char *". This is the size of a memory address (and could be as large as 64-bit type on 64-bit architectures). This is a huge mistake because this should only be using size of char, which is 1. I provided a state.step_large and state.step_small to the FSS functions as a quick solution for more control over memory management. It turns out this is not being used and for very large files this can be very wasteful. In the long term, I believe a better fix is needed where the files are pre-processed to determine the objects and contents. Then, the structures can be allocated with a known size. The reason for this is that it seems that memory resizes are significantly more expensive than processing an arbitrarily large string. Increasing the cost of processing that string from one time to two times is likely worth the cost to save time and resources lost due to memory re-allocations. --- level_0/f_file/c/file.c | 40 ++++++++++++++++++++++++--------- level_0/f_file/c/file.h | 8 +++++-- level_0/f_string/c/private-string.c | 4 ++-- level_0/f_string/c/string-common.h | 8 +++---- level_2/fll_execute/c/private-execute.c | 2 +- level_2/fll_fss/c/fss.c | 2 +- level_2/fll_fss/c/fss_basic.c | 22 +++++++++--------- level_2/fll_fss/c/fss_basic_list.c | 14 +++++------- level_2/fll_fss/c/fss_embedded_list.c | 14 ++++++------ level_2/fll_fss/c/fss_extended.c | 30 ++++++++++++------------- level_2/fll_fss/c/fss_extended_list.c | 14 +++++------- level_2/fll_fss/c/fss_payload.c | 18 +++++++-------- 12 files changed, 95 insertions(+), 81 deletions(-) diff --git a/level_0/f_file/c/file.c b/level_0/f_file/c/file.c index c696416..50ba7fb 100644 --- a/level_0/f_file/c/file.c +++ b/level_0/f_file/c/file.c @@ -2065,14 +2065,22 @@ extern "C" { f_status_t status = F_none; ssize_t size_read = 0; - for (;;) { + flockfile(file.stream); + do { status = f_string_dynamic_increase_by(file.size_read, buffer); - if (F_status_is_error(status)) return status; - size_read = fread(buffer->string + buffer->used, 1, file.size_read, file.stream); + if (F_status_is_error(status)) { + funlockfile(file.stream); + + return status; + } + + size_read = fread_unlocked(buffer->string + buffer->used, 1, file.size_read, file.stream); if (ferror(file.stream)) { + funlockfile(file.stream); + if (errno == EAGAIN || errno == EWOULDBLOCK) return F_status_set_error(F_block); if (errno == EBADF) return F_status_set_error(F_file_descriptor); if (errno == EFAULT) return F_status_set_error(F_buffer); @@ -2086,8 +2094,9 @@ extern "C" { buffer->used += size_read; - if (feof(file.stream)) break; - } // for + } while (size_read == file.size_read && !feof(file.stream)); + + funlockfile(file.stream); return F_none_eof; } @@ -2122,7 +2131,9 @@ extern "C" { return F_status_set_error(F_failure); } - buffer->used += size_read; + if (size_read) { + buffer->used += size_read; + } if (feof(file.stream)) { return F_none_eof; @@ -2140,24 +2151,29 @@ extern "C" { if (!file.stream) return F_status_set_error(F_file_closed); + { + const f_status_t status = f_string_dynamic_increase_by(total, buffer); + if (F_status_is_error(status)) return status; + } + f_array_length_t buffer_size = file.size_read; f_array_length_t buffer_count = 0; - f_status_t status = F_none; ssize_t size_read = 0; + flockfile(file.stream); + for (;;) { if (buffer_count + buffer_size > total) { buffer_size = total - buffer_count; } - status = f_string_dynamic_increase_by(buffer_size, buffer); - if (F_status_is_error(status)) return status; - size_read = fread(buffer->string + buffer->used, 1, file.size_read, file.stream); if (ferror(file.stream)) { + funlockfile(file.stream); + if (errno == EAGAIN || errno == EWOULDBLOCK) return F_status_set_error(F_block); if (errno == EBADF) return F_status_set_error(F_file_descriptor); if (errno == EFAULT) return F_status_set_error(F_buffer); @@ -2172,6 +2188,8 @@ extern "C" { buffer->used += size_read; if (feof(file.stream)) { + funlockfile(file.stream); + return F_none_eof; } @@ -2180,6 +2198,8 @@ extern "C" { if (buffer_count == total) break; } // for + funlockfile(file.stream); + return F_none_stop; } #endif // _di_f_file_stream_read_until_ diff --git a/level_0/f_file/c/file.h b/level_0/f_file/c/file.h index 1bd4540..6cba705 100644 --- a/level_0/f_file/c/file.h +++ b/level_0/f_file/c/file.h @@ -2076,7 +2076,9 @@ extern "C" { * F_interrupt (with error bit) if interrupt was received. * F_parameter (with error bit) if a parameter is invalid. * - * @see fread() + * @see flockfile() + * @see fread_unlocked() + * @see funlockfile() */ #ifndef _di_f_file_stream_read_ extern f_status_t f_file_stream_read(const f_file_t file, f_string_dynamic_t *buffer); @@ -2148,7 +2150,9 @@ extern "C" { * * Errors (with error bit) from: f_string_dynamic_increase_by(). * - * @see fread() + * @see flockfile() + * @see fread_unlocked() + * @see funlockfile() * * @see f_string_dynamic_increase_by() */ diff --git a/level_0/f_string/c/private-string.c b/level_0/f_string/c/private-string.c index 1ba22ad..066277a 100644 --- a/level_0/f_string/c/private-string.c +++ b/level_0/f_string/c/private-string.c @@ -75,7 +75,7 @@ extern "C" { #if !defined(_di_f_string_dynamic_adjust_) || !defined(_di_f_string_dynamic_decimate_by_) || !defined(_di_f_string_dynamics_adjust_) || !defined(_di_f_string_dynamics_append_) || !defined(_di_f_string_dynamics_decimate_by_) || !defined(_di_f_string_map_multis_adjust_) || !defined(_di_f_string_map_multis_append_) || !defined(_di_f_string_triples_adjust_) || !defined(_di_f_string_triples_decimate_by_) f_status_t private_f_string_dynamic_adjust(const f_array_length_t length, f_string_dynamic_t * const dynamic) { - f_status_t status = f_memory_adjust(dynamic->size, length, sizeof(f_string_t), (void **) & dynamic->string); + f_status_t status = f_memory_adjust(dynamic->size, length, sizeof(char), (void **) & dynamic->string); if (F_status_is_error_not(status)) { dynamic->size = length; @@ -107,7 +107,7 @@ extern "C" { #if !defined(_di_f_string_append_) || !defined(_di_f_string_append_assure_) || !defined(_di_f_string_append_mash_) || !defined(_di_f_string_append_nulless_) || !defined(_di_f_string_dynamic_append_) || !defined(_di_f_string_dynamic_append_assure_) || !defined(_di_f_string_dynamic_append_nulless_) || !defined(_di_f_string_dynamic_decrease_by_) || !defined(_di_f_string_dynamic_increase_) || !defined(_di_f_string_dynamic_increase_by_) || !defined(_di_f_string_dynamic_mash_) || !defined(_di_f_string_dynamic_mash_nulless_) || !defined(f_string_dynamic_partial_append) || !defined(_di_f_string_dynamic_partial_append_assure_) || !defined(_di_f_string_dynamic_partial_mash_) || !defined(_di_f_string_dynamic_prepend_) || !defined(_di_f_string_dynamic_prepend_nulless_) || !defined(_di_f_string_dynamic_terminate_) || !defined(_di_f_string_dynamic_terminate_after_) || !defined(_di_f_string_dynamics_append_) || !defined(_di_f_string_map_multis_append_) || !defined(_di_f_string_mash_nulless_) || !defined(_di_f_string_mash_) || !defined(_di_f_string_maps_append_) || !defined(_di_f_string_prepend_) || !defined(_di_f_string_prepend_nulless_) || !defined(_di_f_string_triples_append_) f_status_t private_f_string_dynamic_resize(const f_array_length_t length, f_string_dynamic_t * const dynamic) { - const f_status_t status = f_memory_resize(dynamic->size, length, sizeof(f_string_t), (void **) & dynamic->string); + const f_status_t status = f_memory_resize(dynamic->size, length, sizeof(char), (void **) & dynamic->string); if (F_status_is_error_not(status)) { dynamic->size = length; diff --git a/level_0/f_string/c/string-common.h b/level_0/f_string/c/string-common.h index ea5efcd..ba909f5 100644 --- a/level_0/f_string/c/string-common.h +++ b/level_0/f_string/c/string-common.h @@ -42,11 +42,11 @@ extern "C" { #define macro_f_string_t_clear(string) string = 0; - #define macro_f_string_t_resize(status, string, length_old, length_new) status = f_memory_resize(length_old, length_new, sizeof(f_string_t), (void **) & string); - #define macro_f_string_t_adjust(status, string, length_old, length_new) status = f_memory_adjust(length_old, length_new, sizeof(f_string_t), (void **) & string); + #define macro_f_string_t_resize(status, string, length_old, length_new) status = f_memory_resize(length_old, length_new, sizeof(char), (void **) & string); + #define macro_f_string_t_adjust(status, string, length_old, length_new) status = f_memory_adjust(length_old, length_new, sizeof(char), (void **) & string); - #define macro_f_string_t_delete_simple(string, length) f_memory_resize(length, 0, sizeof(f_string_t), (void **) & string); - #define macro_f_string_t_destroy_simple(string, length) f_memory_adjust(length, 0, sizeof(f_string_t), (void **) & string); + #define macro_f_string_t_delete_simple(string, length) f_memory_resize(length, 0, sizeof(char), (void **) & string); + #define macro_f_string_t_destroy_simple(string, length) f_memory_adjust(length, 0, sizeof(char), (void **) & string); #define F_string_t_size_d F_number_t_size_positive_d #endif // _di_f_string_t_ diff --git a/level_2/fll_execute/c/private-execute.c b/level_2/fll_execute/c/private-execute.c index 16b8bc9..4890414 100644 --- a/level_2/fll_execute/c/private-execute.c +++ b/level_2/fll_execute/c/private-execute.c @@ -588,7 +588,7 @@ extern "C" { void private_fll_execute_path_arguments_fixate(const f_string_static_t program_path, const f_string_statics_t arguments, const f_string_t last_slash, const bool fixated_is, f_string_static_t program_name, f_string_t fixed_arguments[]) { memset(program_name.string, 0, program_name.used + 1); - memset(fixed_arguments, 0, sizeof(f_string_t) * (arguments.used + 2)); + memset(fixed_arguments, 0, sizeof(char) * (arguments.used + 2)); memcpy(program_name.string, last_slash ? last_slash + 1 : program_path.string, program_name.used); diff --git a/level_2/fll_fss/c/fss.c b/level_2/fll_fss/c/fss.c index 4c0642e..5091ceb 100644 --- a/level_2/fll_fss/c/fss.c +++ b/level_2/fll_fss/c/fss.c @@ -11,7 +11,7 @@ extern "C" { if (!range) return F_status_set_error(F_parameter); #endif // _di_level_2_parameter_checking_ - // skip past all NULLs. + // Skip past all NULLs. for (; range->start <= range->stop; ++range->start) { if (buffer[range->start]) break; } diff --git a/level_2/fll_fss/c/fss_basic.c b/level_2/fll_fss/c/fss_basic.c index f080375..4190573 100644 --- a/level_2/fll_fss/c/fss_basic.c +++ b/level_2/fll_fss/c/fss_basic.c @@ -22,17 +22,15 @@ extern "C" { f_fss_quote_t *quoted_object = 0; do { - if (objects->used == objects->size) { - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, objects); - if (F_status_is_error(status2)) return status2; + status = f_string_ranges_increase(state.step_small, objects); + if (F_status_is_error(status)) return status; - status2 = f_string_rangess_increase(F_fss_default_allocation_step_small_d, contents); - if (F_status_is_error(status2)) return status2; + status = f_string_rangess_increase(state.step_small, contents); + if (F_status_is_error(status)) return status; - if (objects_quoted) { - status2 = f_type_uint8s_increase(F_fss_default_allocation_step_small_d, objects_quoted); - if (F_status_is_error(status2)) return status2; - } + if (objects_quoted) { + status = f_type_uint8s_increase(state.step_small, objects_quoted); + if (F_status_is_error(status)) return status; } do { @@ -51,7 +49,7 @@ extern "C" { ++objects_quoted->used; } - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; ++contents->used; @@ -86,7 +84,7 @@ extern "C" { else if (status == F_fss_found_object_content_not) { found_data = F_true; - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; break; @@ -177,7 +175,7 @@ extern "C" { if (F_status_is_error(status)) return status; } else { - status = f_string_dynamic_increase(F_fss_default_allocation_step_small_d, destination); + status = f_string_dynamic_increase(state.step_small, destination); if (F_status_is_error(status)) return status; destination->string[destination->used++] = f_string_eol_s.string[0]; diff --git a/level_2/fll_fss/c/fss_basic_list.c b/level_2/fll_fss/c/fss_basic_list.c index 5bcfffd..dee3c52 100644 --- a/level_2/fll_fss/c/fss_basic_list.c +++ b/level_2/fll_fss/c/fss_basic_list.c @@ -20,13 +20,11 @@ extern "C" { bool found_data = F_false; do { - if (objects->used == objects->size) { - macro_f_fss_objects_t_resize(status2, (*objects), objects->used + F_fss_default_allocation_step_d); - if (F_status_is_error(status)) return status; + status = f_string_ranges_increase(state.step_small, objects); + if (F_status_is_error(status)) return status; - macro_f_fss_contents_t_resize(status2, (*contents), contents->used + F_fss_default_allocation_step_d); - if (F_status_is_error(status)) return status; - } + status = f_string_rangess_increase(state.step_small, contents); + if (F_status_is_error(status)) return status; do { status = fl_fss_basic_list_object_read(buffer, state, range, &objects->array[objects->used], objects_delimits); @@ -36,7 +34,7 @@ extern "C" { if (status == F_fss_found_object || status == F_fss_found_object_content_not) { ++objects->used; - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; ++contents->used; @@ -71,7 +69,7 @@ extern "C" { if (status == F_fss_found_object_content_not) { found_data = F_true; - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; break; diff --git a/level_2/fll_fss/c/fss_embedded_list.c b/level_2/fll_fss/c/fss_embedded_list.c index 4c3f57f..d0721b3 100644 --- a/level_2/fll_fss/c/fss_embedded_list.c +++ b/level_2/fll_fss/c/fss_embedded_list.c @@ -20,8 +20,8 @@ extern "C" { bool found_data = F_false; if (!nest->used) { - macro_f_fss_nest_t_resize(status2, (*nest), F_fss_default_allocation_step_d); - if (F_status_is_error(status2)) return status2; + status = f_fss_nest_resize(state.step_small, nest); + if (F_status_is_error(status)) return status; } else { initial_used = nest->depth[0].used; @@ -29,10 +29,8 @@ extern "C" { do { do { - if (nest->depth[0].used == nest->depth[0].size) { - macro_f_fss_items_t_resize(status2, nest->depth[0], nest->depth[0].used + F_fss_default_allocation_step_d); - if (F_status_is_error(status)) return status; - } + status = f_fss_items_resize(state.step_small, &nest->depth[0]); + if (F_status_is_error(status)) return status; status = fl_fss_embedded_list_object_read(buffer, state, range, &nest->depth[0].array[nest->depth[0].used].object, objects_delimits); if (F_status_is_error(status)) return status; @@ -40,7 +38,7 @@ extern "C" { if (range->start >= range->stop || range->start >= buffer.used) { if (status == F_fss_found_object || status == F_fss_found_object_content_not) { - // extended list requires content closure, so this could be an error. + // Extended list requires content closure, so this could be an error. return F_fss_found_object_content_not; } @@ -64,10 +62,12 @@ extern "C" { found_data = F_true; status = fl_fss_embedded_list_content_read(buffer, state, range, nest, contents_delimits ? contents_delimits : objects_delimits, comments); + break; } else if (status == F_fss_found_object_content_not) { found_data = F_true; + break; } diff --git a/level_2/fll_fss/c/fss_extended.c b/level_2/fll_fss/c/fss_extended.c index 394e5af..1cce8d3 100644 --- a/level_2/fll_fss/c/fss_extended.c +++ b/level_2/fll_fss/c/fss_extended.c @@ -23,22 +23,20 @@ extern "C" { f_fss_quotes_t *quoted_content = 0; do { - if (objects->used == objects->size) { - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, objects); - if (F_status_is_error(status2)) return status2; + status = f_string_ranges_increase(state.step_small, objects); + if (F_status_is_error(status)) return status; - status2 = f_string_rangess_increase(F_fss_default_allocation_step_small_d, contents); - if (F_status_is_error(status2)) return status2; + status = f_string_rangess_increase(state.step_small, contents); + if (F_status_is_error(status)) return status; - if (objects_quoted) { - status2 = f_type_uint8s_increase(F_fss_default_allocation_step_small_d, objects_quoted); - if (F_status_is_error(status2)) return status2; - } + if (objects_quoted) { + status = f_type_uint8s_increase(state.step_small, objects_quoted); + if (F_status_is_error(status)) return status; + } - if (contents_quoted) { - macro_f_fss_quotess_t_increase(status2, F_fss_default_allocation_step_small_d, (*contents_quoted)); - if (F_status_is_error(status2)) return status2; - } + if (contents_quoted) { + status = f_type_uint8ss_increase(state.step_small, contents_quoted); + if (F_status_is_error(status)) return status; } do { @@ -57,13 +55,13 @@ extern "C" { ++objects_quoted->used; } - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; ++contents->used; if (contents_quoted) { - status2 = f_type_uint8s_increase(F_fss_default_allocation_step_small_d, &contents_quoted->array[contents_quoted->used]); + status2 = f_type_uint8s_increase(state.step_small, &contents_quoted->array[contents_quoted->used]); if (F_status_is_error(status2)) return status2; ++contents_quoted->used; @@ -91,7 +89,7 @@ extern "C" { found_data = F_true; if (contents_quoted) { - status2 = f_type_uint8s_increase(F_fss_default_allocation_step_small_d, &contents_quoted->array[contents_quoted->used]); + status2 = f_type_uint8s_increase(state.step_small, &contents_quoted->array[contents_quoted->used]); if (F_status_is_error(status2)) return status2; quoted_content = &contents_quoted->array[contents_quoted->used]; diff --git a/level_2/fll_fss/c/fss_extended_list.c b/level_2/fll_fss/c/fss_extended_list.c index 8e778e9..31427f9 100644 --- a/level_2/fll_fss/c/fss_extended_list.c +++ b/level_2/fll_fss/c/fss_extended_list.c @@ -20,13 +20,11 @@ extern "C" { bool found_data = F_false; do { - if (objects->used == objects->size) { - macro_f_fss_objects_t_resize(status2, (*objects), objects->used + F_fss_default_allocation_step_d); - if (F_status_is_error(status)) return status; + status = f_string_ranges_increase(state.step_small, objects); + if (F_status_is_error(status)) return status; - macro_f_fss_contents_t_resize(status2, (*contents), contents->used + F_fss_default_allocation_step_d); - if (F_status_is_error(status)) return status; - } + status = f_string_rangess_increase(state.step_small, contents); + if (F_status_is_error(status)) return status; do { status = fl_fss_extended_list_object_read(buffer, state, range, &objects->array[objects->used], objects_delimits); @@ -36,7 +34,7 @@ extern "C" { if (status == F_fss_found_object || status == F_fss_found_object_content_not) { ++objects->used; - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; ++contents->used; @@ -71,7 +69,7 @@ extern "C" { else if (status == F_fss_found_object_content_not) { found_data = F_true; - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; break; diff --git a/level_2/fll_fss/c/fss_payload.c b/level_2/fll_fss/c/fss_payload.c index 82c0afe..6739618 100644 --- a/level_2/fll_fss/c/fss_payload.c +++ b/level_2/fll_fss/c/fss_payload.c @@ -20,13 +20,11 @@ extern "C" { bool found_data = F_false; do { - if (objects->used == objects->size) { - macro_f_fss_objects_t_resize(status2, (*objects), objects->used + F_fss_default_allocation_step_d); - if (F_status_is_error(status)) return status; + status = f_string_ranges_increase(state.step_small, objects); + if (F_status_is_error(status)) return status; - macro_f_fss_contents_t_resize(status2, (*contents), contents->used + F_fss_default_allocation_step_d); - if (F_status_is_error(status)) return status; - } + status = f_string_rangess_increase(state.step_small, contents); + if (F_status_is_error(status)) return status; do { status = fl_fss_basic_list_object_read(buffer, state, range, &objects->array[objects->used], objects_delimits); @@ -45,7 +43,7 @@ extern "C" { ++objects->used; - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; ++contents->used; @@ -73,7 +71,7 @@ extern "C" { found_data = F_true; if (fl_string_dynamic_partial_compare_string(f_fss_string_payload_s.string, buffer, f_fss_string_payload_s.used, objects->array[objects->used]) == F_equal_to) { - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status)) return status; contents->array[contents->used].used = 1; @@ -104,13 +102,13 @@ extern "C" { else if (status == F_fss_found_object_content_not) { found_data = F_true; - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; if (fl_string_dynamic_partial_compare_string(f_fss_string_payload_s.string, buffer, f_fss_string_payload_s.used, objects->array[objects->used]) == F_equal_to) { ++objects->used; - status2 = f_string_ranges_increase(F_fss_default_allocation_step_small_d, &contents->array[contents->used]); + status2 = f_string_ranges_increase(state.step_small, &contents->array[contents->used]); if (F_status_is_error(status2)) return status2; ++contents->used; -- 1.8.3.1