]> Kevux Git Server - fll/commitdiff
Feature: add fl_utf_file project
authorKevin Day <thekevinday@gmail.com>
Mon, 4 May 2020 11:47:38 +0000 (06:47 -0500)
committerKevin Day <thekevinday@gmail.com>
Mon, 4 May 2020 11:50:29 +0000 (06:50 -0500)
This is intended to be an f_utf_character handling version of fl_file project.
This f_utf_character is a 4-byte wide storage for a single UTF-8 character.

I accidentally committed the utf_file build settings in a previous commit: f69e57a.

level_1/fl_utf_file/c/utf_file.c [new file with mode: 0644]
level_1/fl_utf_file/c/utf_file.h [new file with mode: 0644]
level_1/fl_utf_file/data/build/dependencies [new file with mode: 0644]
level_1/fl_utf_file/data/build/settings [new file with mode: 0644]

diff --git a/level_1/fl_utf_file/c/utf_file.c b/level_1/fl_utf_file/c/utf_file.c
new file mode 100644 (file)
index 0000000..7118745
--- /dev/null
@@ -0,0 +1,281 @@
+#include <level_1/utf_file.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _di_fl_utf_file_read_
+  f_return_status fl_utf_file_read(f_file *file, f_utf_string_dynamic *buffer) {
+    #ifndef _di_level_1_parameter_checking_
+      if (file == 0) return f_status_set_error(f_invalid_parameter);
+      if (buffer == 0) return f_status_set_error(f_invalid_parameter);
+      if (buffer->used >= buffer->size) return f_status_set_error(f_invalid_parameter);
+    #endif // _di_level_1_parameter_checking_
+
+    if (file->address == 0) return f_status_set_error(f_file_not_open);
+
+    f_status status = f_none;
+
+    f_string_length i = 0;
+    f_string_length total = 0;
+
+    int result = 0;
+    const f_number_unsigned bytes_total = file->size_block * file->size_chunk;
+
+    uint8_t width = 0;
+    uint8_t buffer_read[bytes_total];
+    f_utf_character character = 0;
+
+    for (;;) {
+      if (buffer->used + bytes_total >= buffer->used) {
+        if (buffer->used + bytes_total > f_utf_string_max_size) return f_status_set_error(f_string_too_large);
+
+        f_macro_string_dynamic_resize(status, (*buffer), buffer->used + bytes_total);
+
+        if (f_status_is_error(status)) return status;
+      }
+
+      memset(&buffer_read, 0, bytes_total);
+
+      result = fread(buffer->string + buffer->used, file->size_chunk, file->size_block, file->address);
+
+      if (file->address == 0) return f_status_set_error(f_file_error_read);
+      if (ferror(file->address) != 0) return f_status_set_error(f_file_error_read);
+
+      total = result * file->size_chunk;
+
+      for (i = 0; i < total; i += width) {
+        width = f_macro_utf_byte_width(buffer_read[i]);
+
+        if (i + width > total) return f_status_set_error(f_incomplete_utf_on_eof);
+
+        character = f_macro_utf_character_from_char_1(buffer_read[i]);
+
+        if (width > 1 && i + 1 < total) {
+          character |= f_macro_utf_character_from_char_2(buffer_read[i]);
+
+          if (width > 2 && i + 2 < total) {
+          character |= f_macro_utf_character_from_char_3(buffer_read[i]);
+
+            if (width > 3 && i + 3 < total) {
+              character |= f_macro_utf_character_from_char_4(buffer_read[i]);
+            }
+          }
+        }
+
+        buffer->string[i] = character;
+        buffer->used++;
+      } // for
+
+      if (feof(file->address)) return f_none_on_eof;
+    } // for
+
+    return status;
+  }
+#endif // _di_fl_utf_file_read_
+
+#ifndef _di_fl_utf_file_read_position
+  f_return_status fl_utf_file_read_position(f_file *file, f_utf_string_dynamic *buffer, const f_file_position position) {
+    #ifndef _di_level_1_parameter_checking_
+      if (file == 0) return f_status_set_error(f_invalid_parameter);
+      if (buffer == 0) return f_status_set_error(f_invalid_parameter);
+    #endif // _di_level_1_parameter_checking_
+
+    if (file->address == 0) return f_status_set_error(f_file_not_open);
+
+    int result = 0;
+
+    // first seek to 'where' we need to begin the read.
+    {
+      long current_file_position = ftell(file->address);
+
+      if (current_file_position == -1) return f_status_set_error(f_file_error_seek);
+
+      if (current_file_position > position.start) {
+        result = f_macro_file_seek_to(file->address, file->size_chunk * (0 - (current_file_position - position.start)));
+      }
+      else if (current_file_position < position.start) {
+        result = f_macro_file_seek_to(file->address, file->size_chunk * (position.start - current_file_position));
+      }
+
+      if (result != 0) return f_status_set_error(f_file_error_seek);
+    }
+
+    f_status status = f_none;
+    bool infinite = f_false;
+
+    f_string_length i = 0;
+    f_string_length total = 0;
+
+    f_number_unsigned bytes_total;
+
+    // when total is 0, this means the file read will until EOF is reached.
+    if (position.total == 0) {
+      infinite = f_true;
+      bytes_total = file->size_block * file->size_chunk;
+    }
+    else {
+      bytes_total = position.total * file->size_chunk;
+    }
+
+    uint8_t width = 0;
+    uint8_t buffer_read[bytes_total];
+    f_utf_character character = 0;
+
+    do {
+      if (buffer->used + bytes_total > buffer->size) {
+        if (buffer->used + bytes_total > f_string_max_size) return f_status_set_error(f_string_too_large);
+
+        f_macro_string_dynamic_resize(status, (*buffer), buffer->used + bytes_total);
+
+        if (f_status_is_error(status)) return status;
+      }
+
+      if (position.total == 0) {
+        result = fread(buffer->string + buffer->used, file->size_chunk, file->size_block, file->address);
+      }
+      else {
+        result = fread(buffer->string + buffer->used, file->size_chunk, position.total, file->address);
+      }
+
+      if (file->address == 0) return f_status_set_error(f_file_error_read);
+      if (ferror(file->address) != 0) return f_status_set_error(f_file_error_read);
+
+      total = result * file->size_chunk;
+
+      for (i = 0; i < total; i += width) {
+        width = f_macro_utf_byte_width(buffer_read[i]);
+
+        if (i + width > total) return f_status_set_error(f_incomplete_utf_on_eof);
+
+        character = f_macro_utf_character_from_char_1(buffer_read[i]);
+
+        if (width > 1 && i + 1 < total) {
+          character |= f_macro_utf_character_from_char_2(buffer_read[i]);
+
+          if (width > 2 && i + 2 < total) {
+          character |= f_macro_utf_character_from_char_3(buffer_read[i]);
+
+            if (width > 3 && i + 3 < total) {
+              character |= f_macro_utf_character_from_char_4(buffer_read[i]);
+            }
+          }
+        }
+
+        buffer->string[i] = character;
+        buffer->used++;
+      } // for
+
+      if (feof(file->address)) return f_none_on_eof;
+    } while (infinite);
+
+    return status;
+  }
+#endif // _di_fl_utf_file_read_position
+
+#ifndef _di_fl_utf_file_write_
+  f_return_status fl_utf_file_write(f_file *file, const f_utf_string_dynamic buffer) {
+    #ifndef _di_level_1_parameter_checking_
+      if (file == 0) return f_status_set_error(f_invalid_parameter);
+    #endif // _di_level_1_parameter_checking_
+
+    if (file->address == 0) return f_status_set_error(f_file_not_open);
+
+    f_string_length total = 0;
+    f_string_length last = 0;
+    f_string_length used = 0;
+    f_string_length i = 0;
+
+    size_t written = 0;
+    uint8_t buffer_write[f_file_default_write_size];
+    uint8_t width = 0;
+
+    do {
+      memset(&buffer_write[f_file_default_write_size], 0, f_file_default_write_size);
+
+      for (i = 0; used < f_file_default_write_size && total + i < buffer.used; i++, used += width) {
+        width = f_macro_utf_character_width(buffer.string[total + i]);
+
+        buffer_write[used] = f_macro_utf_character_to_char_1(buffer.string[total + i]);
+
+        if (width > 1) {
+          buffer_write[used + 1] = f_macro_utf_character_to_char_2(buffer.string[total + i]);
+
+          if (width > 2) {
+            buffer_write[used + 2] = f_macro_utf_character_to_char_3(buffer.string[total + i]);
+
+            if (width > 3) {
+              buffer_write[used + 3] = f_macro_utf_character_to_char_4(buffer.string[total + i]);
+            }
+          }
+        }
+      } // for
+
+      written = fwrite(buffer.string + last, file->size_chunk, used, file->address);
+
+      if (written < i * file->size_chunk) return f_status_set_error(f_file_error_write);
+
+      total += i;
+      last += used;
+    } while (total < buffer.used);
+
+    return f_none;
+  }
+#endif // _di_fl_utf_file_write_
+
+#ifndef _di_fl_utf_file_write_position_
+  f_return_status fl_utf_file_write_position(f_file *file, const f_utf_string_dynamic buffer, const f_utf_string_location position) {
+    #ifndef _di_level_1_parameter_checking_
+      if (file == 0) return f_status_set_error(f_invalid_parameter);
+      if (position.start < position.stop) return f_status_set_error(f_invalid_parameter);
+    #endif // _di_level_1_parameter_checking_
+
+    if (file->address == 0) return f_file_not_open;
+
+    f_string_length total = 0;
+    f_string_length last = position.start;
+    f_string_length used = 0;
+    f_string_length i = 0;
+
+    size_t written = 0;
+    uint8_t buffer_write[f_file_default_write_size];
+    uint8_t width = 0;
+
+    const f_string_length max = buffer.used - (position.stop - position.start + 1);
+
+    do {
+      memset(&buffer_write[f_file_default_write_size], 0, f_file_default_write_size);
+
+      for (i = 0; used < f_file_default_write_size && total + i < max; i++, used += width) {
+        width = f_macro_utf_character_width(buffer.string[total + i]);
+
+        buffer_write[used] = f_macro_utf_character_to_char_1(buffer.string[total + i]);
+
+        if (width > 1) {
+          buffer_write[used + 1] = f_macro_utf_character_to_char_2(buffer.string[total + i]);
+
+          if (width > 2) {
+            buffer_write[used + 2] = f_macro_utf_character_to_char_3(buffer.string[total + i]);
+
+            if (width > 3) {
+              buffer_write[used + 3] = f_macro_utf_character_to_char_4(buffer.string[total + i]);
+            }
+          }
+        }
+      } // for
+
+      written = fwrite(buffer.string + last, file->size_chunk, used, file->address);
+
+      if (written < i * file->size_chunk) return f_status_set_error(f_file_error_write);
+
+      total += i;
+      last += used;
+    } while (total < max);
+
+    return f_none;
+  }
+#endif // _di_fl_utf_file_write_position_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/level_1/fl_utf_file/c/utf_file.h b/level_1/fl_utf_file/c/utf_file.h
new file mode 100644 (file)
index 0000000..ca97550
--- /dev/null
@@ -0,0 +1,124 @@
+/**
+ * FLL - Level 1
+ *
+ * Project: Utf File
+ * API Version: 0.5
+ * Licenses: lgplv2.1
+ *
+ * File Operations.
+ */
+#ifndef _FL_utf_file_h
+#define _FL_utf_file_h
+
+// libc includes
+#include <string.h>
+
+// fll-0 includes
+#include <level_0/file.h>
+#include <level_0/string.h>
+#include <level_0/type.h>
+#include <level_0/utf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Load entire file into the UTF-8 buffer.
+ *
+ * This does not validate the UTF-8 codes.
+ *
+ * @param file
+ *   The file to read from.
+ * @param buffer
+ *   The buffer to load the file into.
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eof on success and EOF was reached.
+ *   f_file_not_open (with error bit) if file is not open.
+ *   f_file_error_seek (with error bit) if file seek failed.
+ *   f_file_error_read (with error bit) if file read failed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *   f_error_reallocation (with error bit) on memory reallocation error.
+ *   f_string_too_large (with error bit) if string is too large to fit into buffer.
+ *   f_incomplete_utf_on_eof (with error bit) if UTF-8 character was incomplete at the end of the file.
+ */
+#ifndef _di_fl_utf_file_read_
+  extern f_return_status fl_utf_file_read(f_file *file, f_utf_string_dynamic *buffer);
+#endif // _di_fl_utf_file_read_
+
+/**
+ * Load file into the UTF-8 buffer, based on specified positions.
+ *
+ * This does not validate the UTF-8 codes.
+ *
+ * @param file
+ *   The file to read from.
+ * @param buffer
+ *   The buffer to save the file.
+ * @param position
+ *   The file position to base reading off of.
+ *
+ * @return
+ *   f_none on success.
+ *   f_none_on_eof on success and EOF was reached.
+ *   f_file_not_open (with error bit) if file is not open.
+ *   f_file_error_seek (with error bit) if file seek failed.
+ *   f_file_error_read (with error bit) if file read failed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ *   f_error_reallocation (with error bit) on memory reallocation error.
+ *   f_string_too_large (with error bit) if string is too large to fit into buffer.
+ *   f_incomplete_utf_on_eof (with error bit) if UTF-8 character was incomplete at the end of the file.
+ */
+#ifndef _di_fl_utf_file_read_position_
+  extern f_return_status fl_utf_file_read_position(f_file *file, f_utf_string_dynamic *buffer, const f_file_position position);
+#endif // _di_fl_utf_file_read_position
+
+/**
+ * Save entire UTF-8 buffer into file.
+ *
+ * This does not validate the UTF-8 codes.
+ *
+ * @param file
+ *   The file to save to.
+ * @param buffer
+ *   The buffer to save to the file.
+ *
+ * @return
+ *   f_none on success.
+ *   f_file_not_open (with error bit) if file is not open.
+ *   f_file_error_write (with error bit) if write failed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ */
+#ifndef _di_fl_utf_file_write_
+  extern f_return_status fl_utf_file_write(f_file *file, const f_utf_string_dynamic buffer);
+#endif // _di_fl_utf_file_write_
+
+/**
+ * Save entire UTF-8 buffer into file, based on specified positions.
+ *
+ * This does not validate the UTF-8 codes.
+ *
+ * @param file
+ *   The file to save to.
+ * @param buffer
+ *   The buffer to save to the file.
+ * @param position
+ *   The file position to base writing off of.
+ *
+ * @return
+ *   f_none on success.
+ *   f_file_not_open (with error bit) if file is not open.
+ *   f_file_error_write (with error bit) if write failed.
+ *   f_invalid_parameter (with error bit) if a parameter is invalid.
+ */
+#ifndef _di_fl_utf_file_write_position_
+  extern f_return_status fl_utf_file_write_position(f_file *file, const f_utf_string_dynamic buffer, const f_utf_string_location position);
+#endif // _di_fl_utf_file_write_position_
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // _FL_utf_file_h
diff --git a/level_1/fl_utf_file/data/build/dependencies b/level_1/fl_utf_file/data/build/dependencies
new file mode 100644 (file)
index 0000000..9d424c7
--- /dev/null
@@ -0,0 +1,6 @@
+f_type
+f_status
+f_memory
+f_string
+f_file
+f_utf
diff --git a/level_1/fl_utf_file/data/build/settings b/level_1/fl_utf_file/data/build/settings
new file mode 100644 (file)
index 0000000..e0cb76d
--- /dev/null
@@ -0,0 +1,30 @@
+# fss-0000
+
+project_name fl_utf_file
+project_level 1
+
+version_major 0
+version_minor 5
+version_micro 0
+
+build_compiler gcc
+build_linker ar
+build_libraries -lc
+build_libraries_fll -lf_file -lf_utf -lf_memory
+build_sources_library utf_file.c
+build_sources_program 
+build_sources_headers utf_file.h
+build_sources_bash
+build_sources_settings
+build_shared yes
+build_static yes
+
+defines_all
+defines_static
+defines_shared
+
+flags_all -z now -g
+flags_shared
+flags_static
+flags_library -fPIC
+flags_program -fPIE