Work relating to UTF-8 file related functions.
I also noticed that the *_delete_simple() and *_destroy_simple() macros are missing for the UTF-8 dynamic strings.
dynamic.used = 0; \
}
+ #define f_macro_utf_string_dynamic_delete_simple(dynamic) \
+ f_memory_delete((void **) & dynamic.string, sizeof(f_utf_string), dynamic.size); \
+ dynamic.size = 0; \
+ dynamic.used = 0;
+
+ #define f_macro_utf_string_dynamic_destroy_simple(dynamic) \
+ f_memory_destroy((void **) & dynamic.string, sizeof(f_utf_string), dynamic.size); \
+ dynamic.size = 0; \
+ dynamic.used = 0;
+
#define f_macro_utf_string_dynamic_resize(status, dynamic, new_length) \
status = f_memory_resize((void **) & dynamic.string, sizeof(f_utf_string), dynamic.size, new_length); \
if (status == F_none) { \
if (status == F_none) status = f_memory_destroy((void **) & dynamics.array, sizeof(f_utf_string_dynamic), dynamics.size); \
if (status == F_none) dynamics.used = 0;
+ #define f_macro_utf_string_dynamics_delete_simple(dynamics) \
+ dynamics.used = dynamics.size; \
+ while (dynamics.used > 0) { \
+ dynamics.used--; \
+ f_macro_string_dynamic_delete_simple(dynamics.array[dynamics.used]); \
+ if (dynamics.used == 0) { \
+ if (f_memory_delete((void **) & dynamics.array, sizeof(f_utf_string_dynamic), dynamics.size)) { \
+ dynamics.size = 0; \
+ } \
+ } \
+ }
+
+ #define f_macro_utf_string_dynamics_destroy_simple(dynamics) \
+ dynamics.used = dynamics.size; \
+ while (dynamics.used > 0) { \
+ dynamics.used--; \
+ f_macro_string_dynamic_destroy_simple(dynamics.array[dynamics.used]); \
+ if (dynamics.used == 0) { \
+ if (f_memory_destroy((void **) & dynamics.array, sizeof(f_utf_string_dynamic), dynamics.size)) { \
+ dynamics.size = 0; \
+ } \
+ } \
+ }
+
#define f_macro_utf_string_dynamics_resize(status, dynamics, new_length) \
status = F_none; \
if (new_length < dynamics.size) { \
extern "C" {
#endif
+#if !defined(fl_utf_file_read) || !defined(fl_utf_file_read_until) || !defined(fl_utf_file_read_range)
+ void private_fl_utf_file_process_read_buffer(const char *buffer_read, const ssize_t size_read, f_utf_string_dynamic *buffer, char buffer_char[], uint8_t *width, int8_t *width_last) {
+ f_utf_character character = 0;
+ f_string_length i = 0;
+ uint8_t increment_by = 0;
+
+ for (; i < size_read; i += increment_by) {
+ increment_by = 0;
+
+ if (*width == 0) {
+ *width = f_macro_utf_byte_width(buffer_read[i]);
+ *width_last = -1;
+ }
+
+ if (*width_last < *width) {
+ buffer_char[0] = buffer_read[i];
+ *width_last = 1;
+ increment_by++;
+ }
+
+ if (*width > 1 && i + 1 < size_read) {
+ if (*width_last < *width) {
+ buffer_char[1] = buffer_read[i];
+ *width_last = 2;
+ increment_by++;
+ }
+
+ if (*width > 2 && i + 2 < size_read) {
+ if (*width_last < *width) {
+ buffer_char[2] = buffer_read[i];
+ *width_last = 3;
+ increment_by++;
+ }
+
+ if (*width > 3 && i + 3 < size_read) {
+ buffer_char[3] = buffer_read[i];
+ *width_last = 4;
+ increment_by++;
+ }
+ }
+ }
+
+ if (*width_last == *width) {
+ buffer->string[buffer->used] = f_macro_utf_character_from_char_1((buffer_char[0]));
+
+ if (*width > 1) {
+ buffer->string[buffer->used] |= f_macro_utf_character_from_char_2((buffer_char[1]));
+
+ if (*width > 2) {
+ buffer->string[buffer->used] |= f_macro_utf_character_from_char_3((buffer_char[2]));
+
+ if (*width > 3) {
+ buffer->string[buffer->used] |= f_macro_utf_character_from_char_4((buffer_char[3]));
+ }
+ }
+ }
+
+ buffer->used++;
+ *width = 0;
+ }
+ } // for
+ }
+#endif // !defined(fl_utf_file_read) || !defined(fl_utf_file_read_until) || !defined(fl_utf_file_read_range)
+
#if !defined(fl_utf_file_write) || !defined(fl_utf_file_write_until) || !defined(fl_utf_file_write_range)
f_return_status private_fl_utf_file_write_until(const f_file file, const f_utf_string string, const f_utf_string_length total, f_utf_string_length *written) {
*written = 0;
uint8_t width = 0;
uint8_t width_written = 0;
+ // @todo this needs to identify an invalid UTF-8 string before writing and return an error if invalid.
+
do {
memset(buffer_write, 0, write_size);
#endif
/**
+ * Special function for converting read buffer from 1-byte wide character buffer to a UTF-8 4-byte wide utf_character buffer.
+ *
+ * Intended to be shared to each of the different implementation variations.
+ *
+ * @param buffer_read
+ * The read buffer to process.
+ * @param size_read
+ * The size of the read buffer.
+ * @param buffer
+ * The UTF-8 4-byte wider utf_character buffer.
+ * @param buffer_char
+ * A 1-byte wide character array of size 4 to cache the current buffer_read until all parts are read to store into buffer.
+ * @param width
+ * The width of the last read (first) character.
+ * This is used to determine how much of buffer_char is to be filled before saving into buffer.
+ * @param width_last
+ * The amount of width filled into buffer_char after executing this function.
+ * This may be something other than width when size_read is reached before the entire buffer_char is filled according to width.
+ *
+ * @see fl_utf_file_read()
+ * @see fl_utf_file_read_range()
+ * @see fl_utf_file_read_until()
+ */
+#if !defined(fl_utf_file_read) || !defined(fl_utf_file_read_until) || !defined(fl_utf_file_read_range)
+ void private_fl_utf_file_process_read_buffer(const char *buffer_read, const ssize_t size_read, f_utf_string_dynamic *buffer, char buffer_char[], uint8_t *width, int8_t *width_last) f_gcc_attribute_visibility_internal;
+#endif // !defined(fl_utf_file_read) || !defined(fl_utf_file_read_until) || !defined(fl_utf_file_read_range)
+
+/**
* Private implementation of fl_utf_file_write_until().
*
* Intended to be shared to each of the different implementation variations.
*
* @return
* F_none on success.
- * F_none_stop on success but no data was written (written == 0) (not an error and often happens if file type is not a regular file).
* F_none_eos on success but range.stop exceeded buffer.used (only wrote up to buffer.used).
- * F_parameter (with error bit) if a parameter is invalid.
+ * F_none_stop on success but no data was written (written == 0) (not an error and often happens if file type is not a regular file).
* F_block (with error bit) if file descriptor is set to non-block and the write would result in a blocking operation.
- * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_buffer (with error bit) if the buffer is invalid.
- * F_interrupted (with error bit) if interrupt was received.
- * F_input_output (with error bit) on I/O error.
* F_file_closed (with error bit) if file is not open.
+ * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_file_type_directory (with error bit) if file descriptor represents a directory.
+ * F_input_output (with error bit) on I/O error.
+ * F_interrupted (with error bit) if interrupt was received.
+ * F_parameter (with error bit) if a parameter is invalid.
*
* @see fl_utf_file_write()
* @see fl_utf_file_write_range()
ssize_t size_read = 0;
uint8_t width = 0;
-
- f_utf_string_length i = 0;
- f_utf_character character = 0;
+ int8_t width_last = -1;
char buffer_read[file.size_read];
+ char buffer_char[4] = { 0, 0, 0, 0 };
memset(&buffer_read, 0, sizeof(file.size_read));
while ((size_read = read(file.id, buffer_read, file.size_read)) > 0) {
if (buffer->used + size_read > buffer->size) {
- if (buffer->size + size_read > f_string_length_size) {
+ if (buffer->size + size_read > f_utf_string_length_size) {
return F_status_set_error(F_string_too_large);
}
if (F_status_is_error(status)) return status;
}
- for (i = 0; i < size_read; i += width) {
- width = f_macro_utf_byte_width(buffer_read[i]);
-
- // @fixme this needs to properly validate the UTF-8 width available and also carry ove the count across the outer loop.
-
- character = f_macro_utf_character_from_char_1(buffer_read[i]);
-
- if (width > 1 && i + 1 < size_read) {
- character |= f_macro_utf_character_from_char_2(buffer_read[i]);
-
- if (width > 2 && i + 2 < size_read) {
- character |= f_macro_utf_character_from_char_3(buffer_read[i]);
-
- if (width > 3 && i + 3 < size_read) {
- character |= f_macro_utf_character_from_char_4(buffer_read[i]);
- }
- }
- }
-
- buffer->string[i] = character;
- buffer->used++;
- } // for
+ private_fl_utf_file_process_read_buffer(buffer_read, size_read, buffer, buffer_char, &width, &width_last);
} // while
if (size_read == 0) {
+ if (width != 0) {
+ return F_status_set_error(F_incomplete_utf_eof);
+ }
+
return F_none_eof;
}
-
- if (size_read < 0) {
+ else if (size_read < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) return F_status_set_error(F_block);
if (errno == EBADF) return F_status_set_error(F_file_descriptor);
if (errno == EFAULT) return F_status_set_error(F_buffer);
return F_status_set_error(F_failure);
}
+ else if (width != 0) {
+ return F_status_set_error(F_incomplete_utf_stop);
+ }
return F_none;
}
ssize_t size_read = 0;
uint8_t width = 0;
-
- f_utf_string_length i = 0;
- f_utf_character character = 0;
+ int8_t width_last = -1;
char buffer_read[file.size_read];
+ char buffer_char[4] = { 0, 0, 0, 0 };
memset(&buffer_read, 0, sizeof(file.size_read));
if (F_status_is_error(status)) return status;
}
- for (i = 0; i < size_read; i += width) {
- width = f_macro_utf_byte_width(buffer_read[i]);
-
- // @fixme this needs to properly validate the UTF-8 width available and also carry ove the count across the outer loop.
-
- character = f_macro_utf_character_from_char_1(buffer_read[i]);
-
- if (width > 1 && i + 1 < size_read) {
- character |= f_macro_utf_character_from_char_2(buffer_read[i]);
-
- if (width > 2 && i + 2 < size_read) {
- character |= f_macro_utf_character_from_char_3(buffer_read[i]);
-
- if (width > 3 && i + 3 < size_read) {
- character |= f_macro_utf_character_from_char_4(buffer_read[i]);
- }
- }
- }
-
- buffer->string[i] = character;
- buffer->used++;
- } // for
+ private_fl_utf_file_process_read_buffer(buffer_read, size_read, buffer, buffer_char, &width, &width_last);
}
if (size_read == 0) {
+ if (width != 0) {
+ return F_status_set_error(F_incomplete_utf_eof);
+ }
+
return F_none_eof;
}
-
- if (size_read < 0) {
+ else if (size_read < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) return F_status_set_error(F_block);
if (errno == EBADF) return F_status_set_error(F_file_descriptor);
if (errno == EFAULT) return F_status_set_error(F_buffer);
return F_status_set_error(F_failure);
}
+ else if (width != 0) {
+ return F_status_set_error(F_incomplete_utf_stop);
+ }
return F_none;
}
ssize_t size_read = 0;
uint8_t width = 0;
-
- f_utf_string_length i = 0;
- f_utf_character character = 0;
+ int8_t width_last = -1;
f_utf_string_length buffer_size = file.size_read;
f_utf_string_length buffer_count = 0;
}
char buffer_read[buffer_size];
+ char buffer_char[4] = { 0, 0, 0, 0 };
memset(&buffer_read, 0, sizeof(buffer_size));
if (F_status_is_error(status)) return status;
}
- for (i = 0; i < size_read; i += width) {
- width = f_macro_utf_byte_width(buffer_read[i]);
-
- // @fixme this needs to properly validate the UTF-8 width available and also carry ove the count across the outer loop.
-
- character = f_macro_utf_character_from_char_1(buffer_read[i]);
-
- if (width > 1 && i + 1 < total) {
- character |= f_macro_utf_character_from_char_2(buffer_read[i]);
-
- if (width > 2 && i + 2 < total) {
- character |= f_macro_utf_character_from_char_3(buffer_read[i]);
-
- if (width > 3 && i + 3 < total) {
- character |= f_macro_utf_character_from_char_4(buffer_read[i]);
- }
- }
- }
-
- buffer->string[i] = character;
- buffer->used++;
- buffer_count++;
- } // for
+ private_fl_utf_file_process_read_buffer(buffer_read, size_read, buffer, buffer_char, &width, &width_last);
} // while
if (size_read == 0) {
+ if (width != 0) {
+ return F_status_set_error(F_incomplete_utf_eof);
+ }
+
return F_none_eof;
}
-
- if (size_read < 0) {
+ else if (size_read < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) return F_status_set_error(F_block);
if (errno == EBADF) return F_status_set_error(F_file_descriptor);
if (errno == EFAULT) return F_status_set_error(F_buffer);
return F_status_set_error(F_failure);
}
+ else if (width != 0) {
+ return F_status_set_error(F_incomplete_utf_stop);
+ }
return F_none;
}
return F_data_not;
}
- f_status status = private_fl_utf_file_write_until(file, buffer.string, buffer.used, written);
+ const f_status status = private_fl_utf_file_write_until(file, buffer.string, buffer.used, written);
if (F_status_is_error(status)) return F_status_set_error(status);
if (status == F_none && *written == buffer.used) return F_none_eos;
write_max = buffer.used;
}
- f_status status = private_fl_utf_file_write_until(file, buffer.string, write_max, written);
+ const f_status status = private_fl_utf_file_write_until(file, buffer.string, write_max, written);
if (F_status_is_error(status)) return F_status_set_error(status);
if (status == F_none && *written == buffer.used) return F_none_eos;
write_max = buffer.used;
}
- f_status status = private_fl_utf_file_write_until(file, buffer.string, write_max, written);
+ const f_status status = private_fl_utf_file_write_until(file, buffer.string, write_max, written);
if (F_status_is_error(status)) return F_status_set_error(status);
if (status == F_none && *written == buffer.used) return F_none_eos;
return F_data_not;
}
- // @todo consider adding a custom status return for when an invalid UTF-8 is written due to range limitations.
-
const f_utf_string_length total = (range.stop - range.start) + 1;
f_utf_string_length write_max = total;
write_max = buffer.used;
}
- f_status status = private_fl_utf_file_write_until(file, buffer.string + range.start, write_max, written);
+ const f_status status = private_fl_utf_file_write_until(file, buffer.string + range.start, write_max, written);
if (F_status_is_error(status)) return F_status_set_error(status);
if (status == F_none) {
* API Version: 0.5
* Licenses: lgplv2.1
*
- * UTF-8 File Operations.
+ * UTF-8 File I/O Operations.
*/
#ifndef _FL_utf_file_h
#define _FL_utf_file_h
* @return
* F_none on success.
* F_none_eof on success and EOF was reached.
- * F_parameter (with error bit) if a parameter is invalid.
* F_block (with error bit) if file descriptor is set to non-block and the read would result in a blocking operation.
- * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_buffer (with error bit) if the buffer is invalid.
- * F_interrupted (with error bit) if interrupt was received.
- * F_input_output (with error bit) on I/O error.
* F_file (with error bit) if file descriptor is in an error state.
* F_file_closed (with error bit) if file is not open.
+ * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_file_type_directory (with error bit) if file descriptor represents a directory.
* F_incomplete_utf_eof (with error bit) if UTF-8 character was incomplete at the end of the file.
+ * F_incomplete_utf_stop (with error bit) if UTF-8 character was incomplete at a stop point (file.size_read).
+ * F_input_output (with error bit) on I/O error.
+ * F_interrupted (with error bit) if interrupt was received.
+ * F_parameter (with error bit) if a parameter is invalid.
*
* @see read()
*/
* @return
* F_none on success.
* F_none_eof on success and EOF was reached.
- * F_parameter (with error bit) if a parameter is invalid.
* F_block (with error bit) if file descriptor is set to non-block and the read would result in a blocking operation.
- * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_buffer (with error bit) if the buffer is invalid.
- * F_interrupted (with error bit) if interrupt was received.
- * F_input_output (with error bit) on I/O error.
* F_file (with error bit) if file descriptor is in an error state.
* F_file_closed (with error bit) if file is not open.
+ * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_file_type_directory (with error bit) if file descriptor represents a directory.
* F_incomplete_utf_eof (with error bit) if UTF-8 character was incomplete at the end of the file.
+ * F_incomplete_utf_stop (with error bit) if UTF-8 character was incomplete at a stop point (file.size_read).
+ * F_input_output (with error bit) on I/O error.
+ * F_interrupted (with error bit) if interrupt was received.
+ * F_parameter (with error bit) if a parameter is invalid.
*
* @see read()
*/
-#ifndef _di_fl_utf_file_read_
+#ifndef _di_fl_utf_file_read_block_
extern f_return_status fl_utf_file_read_block(const f_file file, f_utf_string_dynamic *buffer);
-#endif // _di_fl_utf_file_read_
+#endif // _di_fl_utf_file_read_block_
/**
* Read until a given number or EOF is reached, storing it in the buffer.
* @return
* F_none on success.
* F_none_eof on success and EOF was reached.
- * F_parameter (with error bit) if a parameter is invalid.
* F_block (with error bit) if file descriptor is set to non-block and the read would result in a blocking operation.
- * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_buffer (with error bit) if the buffer is invalid.
- * F_interrupted (with error bit) if interrupt was received.
- * F_input_output (with error bit) on I/O error.
* F_file (with error bit) if file descriptor is in an error state.
* F_file_closed (with error bit) if file is not open.
+ * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_file_type_directory (with error bit) if file descriptor represents a directory.
* F_incomplete_utf_eof (with error bit) if UTF-8 character was incomplete at the end of the file.
+ * F_incomplete_utf_stop (with error bit) if UTF-8 character was incomplete at a stop point (file.size_read).
+ * F_input_output (with error bit) on I/O error.
+ * F_interrupted (with error bit) if interrupt was received.
+ * F_parameter (with error bit) if a parameter is invalid.
*
* @see read
*/
*
* @return
* F_none on success.
+ * F_none_eos on success but range.stop exceeded buffer.used (only wrote up to buffer.used).
* F_none_stop on success but no data was written (written == 0) (not an error and often happens if file type is not a regular file).
- * F_parameter (with error bit) if a parameter is invalid.
* F_block (with error bit) if file descriptor is set to non-block and the write would result in a blocking operation.
- * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_buffer (with error bit) if the buffer is invalid.
- * F_interrupted (with error bit) if interrupt was received.
- * F_input_output (with error bit) on I/O error.
* F_file (with error bit) if file descriptor is in an error state.
* F_file_closed (with error bit) if file is not open.
+ * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_file_type_directory (with error bit) if file descriptor represents a directory.
- * F_incomplete_utf_stop (with error bit) if UTF-8 character was incomplete at the stop location.
- * F_incomplete_utf_eos (with error bit) if UTF-8 character was incomplete at the end of the string.
+ * F_input_output (with error bit) on I/O error.
+ * F_interrupted (with error bit) if interrupt was received.
+ * F_parameter (with error bit) if a parameter is invalid.
*
* @see write()
*/
*
* @return
* F_none on success.
+ * F_none_eos on success but range.stop exceeded buffer.used (only wrote up to buffer.used).
* F_none_stop on success but no data was written (written == 0) (not an error and often happens if file type is not a regular file).
- * F_parameter (with error bit) if a parameter is invalid.
* F_block (with error bit) if file descriptor is set to non-block and the write would result in a blocking operation.
- * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_buffer (with error bit) if the buffer is invalid.
- * F_interrupted (with error bit) if interrupt was received.
- * F_input_output (with error bit) on I/O error.
* F_file (with error bit) if file descriptor is in an error state.
* F_file_closed (with error bit) if file is not open.
+ * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_file_type_directory (with error bit) if file descriptor represents a directory.
- * F_incomplete_utf_stop (with error bit) if UTF-8 character was incomplete at the stop location.
- * F_incomplete_utf_eos (with error bit) if UTF-8 character was incomplete at the end of the string.
+ * F_input_output (with error bit) on I/O error.
+ * F_interrupted (with error bit) if interrupt was received.
+ * F_parameter (with error bit) if a parameter is invalid.
*
* @see write()
*/
*
* @return
* F_none on success.
- * F_none_stop on success but no data was written (written == 0) (not an error and often happens if file type is not a regular file).
* F_none_eos on success but range.stop exceeded buffer.used (only wrote up to buffer.used).
- * F_parameter (with error bit) if a parameter is invalid.
+ * F_none_stop on success but no data was written (written == 0) (not an error and often happens if file type is not a regular file).
* F_block (with error bit) if file descriptor is set to non-block and the write would result in a blocking operation.
- * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_buffer (with error bit) if the buffer is invalid.
- * F_interrupted (with error bit) if interrupt was received.
- * F_input_output (with error bit) on I/O error.
* F_file (with error bit) if file descriptor is in an error state.
* F_file_closed (with error bit) if file is not open.
+ * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_file_type_directory (with error bit) if file descriptor represents a directory.
- * F_incomplete_utf_stop (with error bit) if UTF-8 character was incomplete at the stop location.
- * F_incomplete_utf_eos (with error bit) if UTF-8 character was incomplete at the end of the string.
+ * F_input_output (with error bit) on I/O error.
+ * F_interrupted (with error bit) if interrupt was received.
+ * F_parameter (with error bit) if a parameter is invalid.
*
* @see write()
*/
*
* @return
* F_none on success.
- * F_none_stop on success but no data was written (written == 0) (not an error and often happens if file type is not a regular file).
* F_none_eos on success but range.stop exceeded buffer.used (only wrote up to buffer.used).
- * F_parameter (with error bit) if a parameter is invalid.
+ * F_none_stop on success but no data was written (written == 0) (not an error and often happens if file type is not a regular file).
* F_block (with error bit) if file descriptor is set to non-block and the write would result in a blocking operation.
- * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_buffer (with error bit) if the buffer is invalid.
- * F_interrupted (with error bit) if interrupt was received.
- * F_input_output (with error bit) on I/O error.
* F_file (with error bit) if file descriptor is in an error state.
* F_file_closed (with error bit) if file is not open.
+ * F_file_descriptor (with error bit) if the file descriptor is invalid.
* F_file_type_directory (with error bit) if file descriptor represents a directory.
- * F_incomplete_utf_stop (with error bit) if UTF-8 character was incomplete at the stop location.
- * F_incomplete_utf_eos (with error bit) if UTF-8 character was incomplete at the end of the string.
+ * F_input_output (with error bit) on I/O error.
+ * F_interrupted (with error bit) if interrupt was received.
+ * F_parameter (with error bit) if a parameter is invalid.
*
* @see write()
*/