From a6e250e279226f40285de1cdd668bbf380fc65e4 Mon Sep 17 00:00:00 2001 From: Kevin Day Date: Wed, 27 Jul 2022 19:09:16 -0500 Subject: [PATCH] Update: Clarify IKI standard in regards to delimits and improve specification file. Add additional commands to the specification iki_read example. Expand the vocabulary to include more words such as "character". This allows for more granular control over substitution. This is a must to easily convert to both plain text and HTML from the same source file. Fix incorrect example where the delimits are on the wrong characters (url:\' is not correct and instead should be url\;'). Add delimits to accidentally undelimited code. These changes exposed a mistake in both the processing code and a detail on how I explained the specification. The IKI processing only needs a delimit when the vocabulary name followed by a colon followed by either a single or a double quote. That is to say, the matching closing single or double quote is irrelevant. This simplifies the logic needed to both read and write the IKI content for both machines and humans. Note that this is in contrast to how the FSS works. IKI is designed for different purposes than FSS and thus has slightly different design paradigms. The code processing this is incorrectly failing to apply delimits for any potentially valid IKI content inside an already valid but delimited IKI variable. Fixing this conveniently also results in simpler code and fewer variables used (saving trivial memory). --- level_0/f_iki/c/iki.c | 80 ++++++++++++++++---------------------------------- specifications/iki.txt | 32 ++++++++++---------- 2 files changed, 42 insertions(+), 70 deletions(-) diff --git a/level_0/f_iki/c/iki.c b/level_0/f_iki/c/iki.c index 0b0a7e0..e9ca32b 100644 --- a/level_0/f_iki/c/iki.c +++ b/level_0/f_iki/c/iki.c @@ -94,11 +94,10 @@ extern "C" { f_string_range_t found_vocabulary = f_string_range_t_initialize; f_array_length_t found_content = 0; - f_array_length_t vocabulary_slash_first = range->start; + f_array_length_t vocabulary_slash_first = 0; const f_array_length_t delimits_used = data->delimits.used; uint8_t quote = 0; - bool vocabulary_delimited = F_false; do { @@ -133,7 +132,6 @@ extern "C" { if (status == F_true) { found_vocabulary.start = range->start; found_vocabulary.stop = range->start; - vocabulary_delimited = F_false; status = f_utf_buffer_increment(*buffer, range, 1); if (F_status_is_error(status)) break; @@ -210,15 +208,19 @@ extern "C" { } if (separator_found) { + + // Save delimit for a would-be valid IKI that is now delimited. if (buffer->string[range->start] == f_iki_syntax_quote_single_s.string[0] || buffer->string[range->start] == f_iki_syntax_quote_double_s.string[0]) { - status = F_true; - vocabulary_delimited = F_true; - quote = buffer->string[range->start]; - } - else { - status = F_next; + status = f_array_lengths_increase(state.step_small, &data->delimits); + if (F_status_is_error(status)) break; + + data->delimits.array[data->delimits.used++] = vocabulary_slash_first; + + ++range->start; } + status = F_next; + break; } @@ -277,7 +279,6 @@ extern "C" { if (status == F_next) { quote = 0; - vocabulary_delimited = F_false; continue; } @@ -305,20 +306,6 @@ extern "C" { } if (buffer->string[range->start] == quote) { - - // This is a valid vocabulary name and content, but if it is delimited, save the delimit and ignore. - if (vocabulary_delimited) { - status = f_array_lengths_increase(state.step_small, &data->delimits); - if (F_status_is_error(status)) break; - - data->delimits.array[data->delimits.used++] = vocabulary_slash_first; - - vocabulary_delimited = F_false; - quote = 0; - - break; - } - status = f_string_ranges_increase(state.step_small, &data->variable); if (F_status_is_error(status)) break; @@ -385,43 +372,28 @@ extern "C" { // Valid content's ending quote is not delimited, save and return. if (content_slash_total % 2 == 0) { + status = f_string_ranges_increase(state.step_small, &data->variable); + if (F_status_is_error(status)) break; - // This is a valid vocabulary name and content, but if it is delimited, save the delimit and ignore. - if (vocabulary_delimited) { - status = f_array_lengths_increase(state.step_small, &data->delimits); - if (F_status_is_error(status)) break; - - data->delimits.array[data->delimits.used++] = vocabulary_slash_first; - - vocabulary_delimited = F_false; - quote = 0; - - ++range->start; - } - else { - status = f_string_ranges_increase(state.step_small, &data->variable); - if (F_status_is_error(status)) break; - - status = f_string_ranges_increase(state.step_small, &data->vocabulary); - if (F_status_is_error(status)) break; + status = f_string_ranges_increase(state.step_small, &data->vocabulary); + if (F_status_is_error(status)) break; - status = f_string_ranges_increase(state.step_small, &data->content); - if (F_status_is_error(status)) break; + status = f_string_ranges_increase(state.step_small, &data->content); + if (F_status_is_error(status)) break; - data->variable.array[data->variable.used].start = found_vocabulary.start; - data->variable.array[data->variable.used++].stop = range->start; + data->variable.array[data->variable.used].start = found_vocabulary.start; + data->variable.array[data->variable.used++].stop = range->start; - data->vocabulary.array[data->vocabulary.used].start = found_vocabulary.start; - data->vocabulary.array[data->vocabulary.used++].stop = found_vocabulary.stop; + data->vocabulary.array[data->vocabulary.used].start = found_vocabulary.start; + data->vocabulary.array[data->vocabulary.used++].stop = found_vocabulary.stop; - data->content.array[data->content.used].start = found_content; - data->content.array[data->content.used++].stop = range->start - 1; + data->content.array[data->content.used].start = found_content; + data->content.array[data->content.used++].stop = range->start - 1; - if (++range->start > range->stop) return F_none_stop; - if (range->start >= buffer->used) return F_none_eos; + if (++range->start > range->stop) return F_none_stop; + if (range->start >= buffer->used) return F_none_eos; - return F_none; - } + return F_none; } break; diff --git a/specifications/iki.txt b/specifications/iki.txt index 22d579c..c734781 100644 --- a/specifications/iki.txt +++ b/specifications/iki.txt @@ -3,10 +3,10 @@ # license: open-standard-license-1.0 # # This file (assumed to be named iki.txt) can be more easily read using the following iki_read commands: -# iki_read iki.txt -w +# iki_read iki.txt +Q -w -WW character '"' '"' code '"' '"' # # To read the "IKI Specifications" section of this file, use this command sequence: -# fss_basic_list_read iki.txt -cn "IKI Specifications" | iki_read -w +# fss_basic_list_read iki.txt -cn "IKI Specifications" | iki_read +Q -w -WW character '"' '"' code '"' '"' # IKI Specifications: @@ -20,22 +20,22 @@ IKI Specifications: The IKI format will use code:"iki-0000" to represent an IKI with no explicitly defined vocabulary. Whereas code:"iki-0001" and beyond represent a specific IKI vocabulary. - A potential IKI variable name starts on word (or code:"_", code:"-", code:"+") characters. - White space and non-word (and non code:"_", code:"-", code:"+") character punctuations may not exist as part of the variable name. - The only Unicode dash-like characters allowed as a "dash" are those intended to connect, such as the Unicode hyphens (code:"U+2010" and code:"U+2011"). + A potential IKI variable name starts on word (or character:"_", character:"-", character:"+") characters. + White space and non-word (and non character:"_", character:"-", character:"+") character punctuations may not exist as part of the variable name. + The only Unicode dash-like characters allowed as a "dash" are those intended to connect, such as the Unicode hyphens (unicode:"U+2010" and unicode:"U+2011"). - Any valid IKI data may be escaped to make it treated as non-IKI data by prepending a backslash code:"\" before the colon code:":" that is before the opening quote (single or double). + Any potential IKI data must be escaped to make it treated as non-IKI data by prepending a backslash character:"\\" before the colon character:":" that is before the opening quote (single or double). Potential IKI data refers to any valid IKI sequence without considering the closing single quote character:"'" or closing double quote character:'"'. - Unicode punctuation connector characters are supported just like code:"_", except when they connect outside the current line (such as code:"U+FE33" code:"︳"). - Unicode invisible punctuations (such as invisible plus: code:"U+2064") are not considered a punctuations in this standard (because they a zero-width characters), therefore they are not to be considered a valid code:"_", code:"-", or code:"+" Unicode equivalents. + Unicode punctuation connector characters are supported just like character:"_", except when they connect outside the current line (such as unicode:"U+FE33" character:"︳"). + Unicode invisible punctuations (such as invisible plus: unicode:"U+2064") are not considered a punctuations in this standard (because they a zero-width characters), therefore they are not to be considered a valid character:"_", character:"-", or character:"+" Unicode equivalents. Key\: - code:"\o" = any printable word character, including code:"_", code:"-", code:"+" (and Unicode equivalents). + code:"\o" = any printable word character, including character:"_", character:"-", character:"+" (and Unicode equivalents). code:"\c" = any character, including white space and non-printing, and any delimited quote (used as the opening quote) or a any quote (undelimited) not used as the opening quote. - code:"\q" = either a single quote code:"'" or a double quote code:'"'. + code:"\q" = either a single quote character:"'" or a double quote character:'"'. code:"\x" = any character. - code:"\W" = any non-word character, discluding code:"_", code:"-", code:"+" (and Unicode equivalents). - code:"\e" = an optional escape sequence of any number of backslashes, such as code:"\\". + code:"\W" = any non-word character, discluding character:"_", character:"-", character:"+" (and Unicode equivalents). + code:"\e" = an optional escape sequence of any number of backslashes, such as character:"\\". code:"*" = zero or more occurrences. code:"~" = one or more occurrences, or zero if at start of file. @@ -49,15 +49,15 @@ IKI Specifications: code:"" Example\: - code:'# fss-000c iki-0000 + code\:'# fss-000c iki-0000 - This is my sentence, anything can go here but sometimes I want to emphasis:"emphasize some text". + This is my sentence, anything can go here but sometimes I want to emphasis\:"emphasize some text". - Other times I want to render a url such as this example url: url:\'http://www.example.com/url with space/\'. + Other times I want to render a url such as this example url: url\:'http://www.example.com/url with space/'. There are no comments, except for maybe the FSS header (which would not resolve to any IKI syntax anyway). - Quotes may be included, such as: code:"const char *string = \"My \\\"quoted\\\" C string.\";". + Quotes may be included, such as: code\:"const char *string = \"My \\\"quoted\\\" C string.\";". The following emphasis\:"is escaped to not be treated as IKI data".' -- 1.8.3.1