One of the original goals of the FLL project is to achieve fail-through functionality.
Knowing that this is a lot of work, I have ignored a lot of situations where I can implement fail-through and simply performed fail-out or fail-over.
With the upcoming stable release, I believe that this must handle bad data files.
This adds the option to conditionally change the behavior between fail-through and fail-out for the f_iki_read() and related for invalid UTF-8 code sequences.
The default behavior is now changed from fail-out to fail-through.
do {
// Find the start of the vocabulary name.
- while (F_status_is_error_not(status) && range->start <= range->stop && range->start < buffer->used) {
+ while (range->start <= range->stop && range->start < buffer->used) {
if (state.interrupt) {
status = state.interrupt((void *) &state, 0);
width_max = buffer->used - range->start;
status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false);
- if (F_status_is_error(status)) break;
+
+ if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_utf_fragment || F_status_set_fine(status) == F_complete_not_utf) {
+ if (state.flag & f_iki_state_flag_utf_fail_on_valid_not_e) {
+ break;
+ }
+
+ status = F_false;
+ }
+ else {
+ break;
+ }
+ }
if (status == F_true) {
found_vocabulary.start = range->start++;
}
status = f_utf_buffer_increment(*buffer, range, 1);
+ if (F_status_is_error(status)) break;
} // while
// Find the end of the vocabulary name.
width_max = buffer->used - range->start;
status = f_utf_is_word_dash_plus(buffer->string + range->start, width_max, F_false);
- if (F_status_is_error(status)) break;
+
+ if (F_status_is_error(status)) {
+ if (F_status_set_fine(status) == F_utf_fragment || F_status_set_fine(status) == F_complete_not_utf) {
+ if (state.flag & f_iki_state_flag_utf_fail_on_valid_not_e) {
+ break;
+ }
+
+ status = F_false;
+ }
+ else {
+ break;
+ }
+ }
// Not a valid IKI vocabulary name.
if (status != F_true) break;
} while (range->start <= range->stop && range->start < buffer->used);
+ if (F_status_set_fine(status) == F_complete_not_utf_eos || F_status_set_fine(status) == F_complete_not_utf_stop) {
+ if (!(state.flag & f_iki_state_flag_utf_fail_on_valid_not_e)) {
+ status = F_status_set_fine(status);
+ }
+ }
+
if (F_status_is_error(status)) {
data->delimits.used = delimits_used;
* Calling this more than once on the same buffer range could result in multiple escaping.
*
* @param state
- * A state for handling interrupts during long running operations.
- * There is no print_error() usage at this time (@todo this should be implemented and supported).
+ * A state for providing flags and handling interrupts during long running operations.
+ * There is no print_error().
* There is no functions structure.
- * There is no data structure passed to these functions (@todo the additional parameters could be moved to a custom structure).
+ * There is no data structure passed to these functions.
*
* When interrupt() returns, only F_interrupt and F_interrupt_not are processed.
* Error bit designates an error but must be passed along with F_interrupt.
* F_none on success and an IKI vocabulary name was found.
* F_none_eos on success and an IKI vocabulary name was found and end of string was reached.
* F_none_stop on success and an IKI vocabulary name was found and stop point was reached.
+ * F_complete_not_utf_eos on success but string ended on incomplete UTF-8 and f_iki_state_flag_utf_fail_on_valid_not_e is not set.
+ * F_complete_not_utf_stop on success but stop point reached on incomplete UTF-8 and f_iki_state_flag_utf_fail_on_valid_not_e is not set.
* F_data_not on success, but there were no IKI vocabulary names found.
* F_data_not_eos on success and EOS was reached, but there were no IKI vocabulary names found.
* F_data_not_stop on success and stop point was reached, but there were no IKI vocabulary names found.
*
+ * F_complete_not_utf_eos (with error bit) on success but string ended on incomplete UTF-8 and f_iki_state_flag_utf_fail_on_valid_not_e is set.
+ * F_complete_not_utf_stop (with error bit) on success but stop point reached on incomplete UTF-8 and f_iki_state_flag_utf_fail_on_valid_not_e is set.
* F_interrupt (with error bit) if stopping due to an interrupt.
* F_memory_not (with error bit) on out of memory.
* F_parameter (with error bit) if a parameter is invalid.
#endif
/**
+ * State flags associated with iki functions.
+ *
+ * These flags are meant to be bitwise for the 32-bit f_state_t flag property.
+ *
+ * The f_iki_state_flag_none_e is expected to be 0, therefore it must be safe to use 0 directly.
+ *
+ * f_iki_state_flag_*:
+ * - none: No flags are set.
+ * - utf_fail_on_valid_not: Immediately fail on invalid UTF-8 character (including incomplete).
+ */
+#ifndef _di_f_iki_state_flags_
+ enum {
+ f_iki_state_flag_none_e = 0,
+ f_iki_state_flag_utf_fail_on_valid_not_e = 0x1,
+ }; // enum
+#endif // _di_f_iki_state_flags_
+
+/**
* IKI-specific syntax.
*/
#ifndef _di_f_iki_syntax_
status = f_iki_read(state, buffer, range, data);
if (F_status_is_error(status)) return status;
- if (status == F_data_not_eos || status == F_data_not_stop) {
- return status;
- }
-
- if (status == F_none_eos || status == F_none_stop) {
- return status;
- }
-
} while (range->start <= range->stop && range->start < buffer->used);
- return F_none;
+ return status;
}
#endif // _di_fl_iki_read_
* This only finds complete vocabulary names and their respective content.
*
* @param state
- * A state for handling interrupts during long running operations.
- * There is no print_error() usage at this time (@todo this should be implemented and supported).
- * There is no functions structure.
- * There is no data structure passed to these functions (@todo the additional parameters could be moved to a custom structure).
- *
- * When interrupt() returns, only F_interrupt and F_interrupt_not are processed.
- * Error bit designates an error but must be passed along with F_interrupt.
- * All other statuses are ignored.
+ * A state for providing flags and handling interrupts during long running operations.
* @param buffer
* The string to process.
* @param range
* F_none on success and an IKI vocabulary name was found.
* F_none_stop on success and an IKI vocabulary name was found and stop point was reached.
* F_none_eos on success and an IKI vocabulary name was found and end of string was reached.
+ * F_complete_not_utf_eos on success and EOS was reached, but at an incomplete UTF-8 sequence.
+ * F_complete_not_utf_stop on success and stop point was reached, but at an incomplete UTF-8 sequence.
* F_data_not_eos on success and EOS was reached, but there were no IKI vocabularie names found.
* F_data_not_stop on success and stop point was reached, but there were no IKI vocabularie names found.
*
if (size_file > iki_read_block_max) {
file.size_read = iki_read_block_read_large;
size_block = iki_read_block_max;
-
- // Pre-allocate entire file buffer plus space for the terminating NULL.
- f_string_dynamic_increase_by(size_file + (size_block - (size_file % size_block)) + 1, &data.buffer);
}
else {
file.size_read = iki_read_block_read_small;
size_block = size_file;
-
- // Pre-allocate entire file buffer plus space for the terminating NULL.
- f_string_dynamic_increase_by(size_file + 1, &data.buffer);
}
+ // Pre-allocate entire file buffer plus space for the terminating NULL.
+ f_string_dynamic_increase_by(size_file + 1, &data.buffer);
+
if (F_status_is_error(status)) {
fll_error_file_print(main->error, F_status_set_fine(status), "f_string_dynamic_resize", F_true, data.argv[main->parameters.remaining.array[i]], f_file_operation_process_s, fll_error_file_type_file_e);
status = iki_read_process_at(data, &buffer_range);
- if (status == F_true && buffer_range.start > data->buffer.used || status == F_data_not) {
+ if ((status == F_true && buffer_range.start > data->buffer.used) || status == F_data_not) {
f_iki_data_delete(&iki_data);
return F_data_not;