summaryrefslogtreecommitdiff
path: root/source/kit/xml.c
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2024-02-11 18:17:33 +0100
committerMitya Selivanov <automainint@guattari.tech>2024-02-11 18:17:33 +0100
commitdf00df5a7a5bcd9076d4423128ea014ab3535626 (patch)
tree337e62f8ca39b19b250b155a3fbeb495384e356b /source/kit/xml.c
parent80da54bb97c279aa60fb77a9bbad9baa0f2e4477 (diff)
downloadsaw-df00df5a7a5bcd9076d4423128ea014ab3535626.zip
Update kit
Diffstat (limited to 'source/kit/xml.c')
-rw-r--r--source/kit/xml.c431
1 files changed, 237 insertions, 194 deletions
diff --git a/source/kit/xml.c b/source/kit/xml.c
index c62ee82..8d22bf3 100644
--- a/source/kit/xml.c
+++ b/source/kit/xml.c
@@ -4,198 +4,241 @@
#include <assert.h>
typedef struct {
- ib_t last;
- kit_str_builder_t text;
- kit_da_xml_t tags;
+ ib_token_t last;
+ str_builder_t text;
+ kit_da_xml_t tags;
} kit_xml_intermediate_t;
-static kit_status_t kit_xml_unescape_(str_builder_t *str) {
- assert(str != NULL);
+static s32 kit_xml_alloc_and_unescape_(str_builder_t *dst, str_t str,
+ kit_allocator_t *alloc) {
+ assert(dst != NULL);
+ assert(str.size == 0 || str.values != NULL);
- str_builder_t buf;
- DA_INIT(buf, str->size, str->alloc);
- buf.size = 0;
+ if (dst == NULL)
+ return KIT_ERROR_INTERNAL;
+ if (str.size != 0 && str.values == NULL)
+ return KIT_ERROR_INTERNAL;
- for (i64 i = 0; i < str->size; i++)
- if (str->values[i] != '&')
- buf.values[buf.size++] = str->values[i];
+ DA_INIT(*dst, str.size, alloc);
+
+ if (dst->size != str.size)
+ return KIT_ERROR_BAD_ALLOC;
+
+ dst->size = 0;
+
+ for (i64 i = 0; i < str.size; i++)
+ if (str.values[i] != '&')
+ dst->values[dst->size++] = str.values[i];
else {
i64 n = 1;
- while (i + n < str->size && str->values[i + n] != ';') n++;
- if (i + n >= str->size) {
- DA_DESTROY(buf);
- return KIT_ERROR_INTERNAL;
+ while (i + n < str.size && str.values[i + n] != ';') n++;
+ if (i + n >= str.size) {
+ DA_DESTROY(*dst);
+ return KIT_PARSING_FAILED;
}
- if (n == 3 && memcmp(str->values + i, "&lt;", 4) == 0)
- buf.values[buf.size++] = '<';
- else if (n == 3 && memcmp(str->values + i, "&gt;", 4) == 0)
- buf.values[buf.size++] = '>';
- else if (n == 4 && memcmp(str->values + i, "&amp;", 5) == 0)
- buf.values[buf.size++] = '&';
- else if (n == 5 && memcmp(str->values + i, "&quot;", 6) == 0)
- buf.values[buf.size++] = '"';
- else if (n == 5 && memcmp(str->values + i, "&apos;", 6) == 0)
- buf.values[buf.size++] = '\'';
- else {
- DA_DESTROY(buf);
- return KIT_ERROR_INTERNAL;
+ if (n == 3 && memcmp(str.values + i, "&lt;", 4) == 0)
+ dst->values[dst->size++] = '<';
+ else if (n == 3 && memcmp(str.values + i, "&gt;", 4) == 0)
+ dst->values[dst->size++] = '>';
+ else if (n == 4 && memcmp(str.values + i, "&amp;", 5) == 0)
+ dst->values[dst->size++] = '&';
+ else if (n == 5 && memcmp(str.values + i, "&quot;", 6) == 0)
+ dst->values[dst->size++] = '"';
+ else if (n == 5 && memcmp(str.values + i, "&apos;", 6) == 0)
+ dst->values[dst->size++] = '\'';
+ else if (n - 2 <= 8 && str.values[i + 1] == '#' &&
+ str.values[i + 2] == 'x') {
+ // hex encoding
+ //
+
+ c8 buf[8];
+ u64 x = 0;
+
+ memcpy(buf, str.values + (i + 3), n - 2);
+
+ for (i64 k = 0; k < n - 2; k++) {
+ c8 c = str.values[i + 3 + k];
+ x <<= 8;
+ if (c >= '0' && c <= '9')
+ x |= (c - '0');
+ else if (c >= 'a' && c <= 'f')
+ x |= 10 + (c - 'a');
+ else if (c >= 'A' && c <= 'F')
+ x |= 10 + (c - 'A');
+ else {
+ x = 0;
+ break;
+ }
+ }
+
+ if (x == 0 || x > 255u) {
+ // TODO
+ // UTF-8 encoding
+
+ DA_DESTROY(*dst);
+ return KIT_PARSING_FAILED;
+ }
+
+ dst->values[dst->size++] = (c8) x;
+ } else if (n - 1 <= 20 && str.values[i + 1] == '#') {
+ // dec encoding
+ //
+
+ c8 buf[20];
+ u64 x = 0;
+
+ memcpy(buf, str.values + (i + 2), n - 2);
+
+ for (i64 k = 0; k < n - 1; k++) {
+ c8 c = str.values[i + 2 + k];
+ x *= 10;
+ if (c >= '0' && c <= '9')
+ x += (c - '0');
+ else {
+ x = 0;
+ break;
+ }
+ }
+
+ if (x == 0 || x > 255u) {
+ // TODO
+ // UTF-8 encoding
+
+ DA_DESTROY(*dst);
+ return KIT_PARSING_FAILED;
+ }
+
+ dst->values[dst->size++] = (c8) x;
+ } else {
+ DA_DESTROY(*dst);
+ return KIT_PARSING_FAILED;
}
i += n;
}
- DA_DESTROY(*str);
- *str = buf;
-
return KIT_OK;
}
-static ib_t kit_xml_parse_text_(ib_t begin) {
- ib_t text = ib_until(begin, SZ("<"));
- ib_t last = ib_copy(text);
+static ib_token_t kit_xml_parse_text_(ib_token_t begin,
+ str_builder_t *dst) {
+ ib_token_t last = ib_until(begin, SZ("<"));
+
+ DA_RESIZE(*dst, last.size);
+
+ assert(dst->size == last.size);
+ if (dst->size != last.size)
+ last.status |= KIT_ERROR_BAD_ALLOC;
+ else if (last.size > 0)
+ memcpy(dst->values, ib_str(last).values, last.size);
for (;;) {
- ib_t comment_open = ib_exact(last, SZ("<!--"));
+ ib_token_t comment_open = ib_exact(last, SZ("<!--"));
- if (comment_open.status != KIT_OK) {
- ib_destroy(comment_open);
+ if (comment_open.status != KIT_OK)
break;
- }
- ib_t comment_text = ib_until(comment_open, SZ("-->"));
- ib_t comment_close = ib_exact(comment_text, SZ("-->"));
- ib_t next_text = ib_until(comment_close, SZ("<"));
+ ib_token_t comment_text = ib_until(comment_open, SZ("-->"));
+ ib_token_t comment_close = ib_exact(comment_text, SZ("-->"));
+ ib_token_t next_text = ib_until(comment_close, SZ("<"));
- if (next_text.status == KIT_OK && next_text.data.size > 0) {
- i64 n = text.data.size;
- DA_RESIZE(text.data, n + next_text.data.size);
+ if (next_text.status == KIT_OK && next_text.size > 0) {
+ i64 n = dst->size;
+ DA_RESIZE(*dst, n + next_text.size);
- assert(text.data.size == n + next_text.data.size);
- if (text.data.size != n + next_text.data.size)
- next_text.status = KIT_ERROR_BAD_ALLOC;
+ assert(dst->size == n + next_text.size);
+ if (dst->size != n + next_text.size)
+ next_text.status |= KIT_ERROR_BAD_ALLOC;
else
- memcpy(text.data.values + n, next_text.data.values,
- next_text.data.size);
+ memcpy(dst->values + n, ib_str(next_text).values,
+ ib_str(next_text).size);
}
- ib_destroy(last);
- last = ib_copy(next_text);
-
- ib_destroy(comment_open);
- ib_destroy(comment_text);
- ib_destroy(comment_close);
- ib_destroy(next_text);
+ last = next_text;
}
- // move
- DA_DESTROY(last.data);
- last.data = text.data;
- memset(&text.data, 0, sizeof text.data);
-
- kit_status_t s = kit_xml_unescape_(&last.data);
- if (s != KIT_OK)
- last.status = s;
-
- ib_destroy(text);
-
return last;
}
-static ib_t kit_xml_parse_string_(ib_t begin) {
- ib_t quotes_open = ib_exact(begin, SZ("\""));
- ib_t apostr_open = ib_exact(begin, SZ("'"));
-
- ib_t open = quotes_open.status == KIT_OK ? quotes_open
- : apostr_open;
-
- ib_t text = ib_until(open, WRAP_STR(open.data));
- ib_t close = ib_exact(text, WRAP_STR(open.data));
+static ib_token_t kit_xml_parse_string_(ib_token_t begin,
+ ib_token_t *value) {
+ assert(value != NULL);
+ if (value == NULL) {
+ begin.status |= KIT_ERROR_INTERNAL;
+ return begin;
+ }
- // move
- DA_DESTROY(close.data);
- close.data = text.data;
- memset(&text.data, 0, sizeof text.data);
+ ib_token_t quotes_open = ib_exact(begin, SZ("\""));
+ ib_token_t apostr_open = ib_exact(begin, SZ("'"));
- kit_status_t s = kit_xml_unescape_(&close.data);
- if (s == KIT_OK)
- close.status = s;
+ ib_token_t open = quotes_open.status == KIT_OK ? quotes_open
+ : apostr_open;
- ib_destroy(quotes_open);
- ib_destroy(apostr_open);
- ib_destroy(text);
+ *value = ib_until(open, ib_str(open));
+ ib_token_t close = ib_exact(*value, ib_str(open));
return close;
}
static kit_xml_intermediate_t kit_xml_parse_buf_(
- ib_t begin, kit_allocator_t *alloc) {
+ ib_token_t begin, kit_allocator_t *alloc) {
kit_xml_intermediate_t res;
memset(&res, 0, sizeof res);
- ib_t last, spaces;
+ ib_token_t last, spaces;
memset(&last, 0, sizeof last);
memset(&spaces, 0, sizeof spaces);
- ib_t tag_text = kit_xml_parse_text_(begin);
- last = ib_copy(tag_text);
+ str_builder_t tag_text_string;
+ str_builder_t tag_tail_string;
+ DA_INIT(tag_text_string, 0, alloc);
+ DA_INIT(tag_tail_string, 0, alloc);
+
+ ib_token_t tag_text = kit_xml_parse_text_(begin, &tag_text_string);
+ last = tag_text;
DA_INIT(res.tags, 0, alloc);
for (;;) {
- ib_t tagend_open = ib_exact(last, SZ("</"));
- ib_destroy(tagend_open);
+ ib_token_t tagend_open = ib_exact(last, SZ("</"));
if (tagend_open.status == KIT_OK)
break;
- ib_t tag_open = ib_exact(last, SZ("<"));
+ ib_token_t tag_open = ib_exact(last, SZ("<"));
- if (tag_open.status != KIT_OK) {
- ib_destroy(tag_open);
+ if (tag_open.status != KIT_OK)
break;
- }
xml_t tag;
memset(&tag, 0, sizeof tag);
- ib_t decl_open = ib_exact(tag_open, SZ("?"));
-
- ib_destroy(last);
+ ib_token_t decl_open = ib_exact(tag_open, SZ("?"));
if (decl_open.status == KIT_OK) {
tag.is_declaration = 1;
- last = ib_copy(decl_open);
+ last = decl_open;
} else
- last = ib_copy(tag_open);
-
- ib_destroy(decl_open);
+ last = tag_open;
- spaces = ib_any(last, SZ(" \t\r\n"));
- ib_t tag_name = ib_none(spaces, SZ(" \t\r\n/>"));
- ib_destroy(spaces);
+ spaces = ib_any(last, SZ(" \t\r\n"));
+ ib_token_t tag_name = ib_none(spaces, SZ(" \t\r\n/>"));
DA_INIT(tag.properties, 0, alloc);
- ib_destroy(last);
- last = ib_copy(tag_name);
+ last = tag_name;
for (;;) {
- spaces = ib_any(last, SZ(" \t\r\n"));
- ib_t property = ib_none(spaces, SZ(" \t\r\n=?/>"));
- ib_destroy(spaces);
+ spaces = ib_any(last, SZ(" \t\r\n"));
+ ib_token_t property = ib_none(spaces, SZ(" \t\r\n=?/>"));
- if (property.status != KIT_OK || property.data.size == 0) {
- ib_destroy(property);
+ if (property.status != KIT_OK || property.size == 0)
break;
- }
- spaces = ib_any(property, SZ(" \t\r\n"));
- ib_t equals = ib_exact(spaces, SZ("="));
- ib_destroy(spaces);
- spaces = ib_any(equals, SZ(" \t\r\n"));
- ib_t value = kit_xml_parse_string_(spaces);
- ib_destroy(spaces);
+ spaces = ib_any(property, SZ(" \t\r\n"));
+ ib_token_t equals = ib_exact(spaces, SZ("="));
+ spaces = ib_any(equals, SZ(" \t\r\n"));
- ib_destroy(last);
- last = ib_copy(value);
+ ib_token_t value;
+ last = kit_xml_parse_string_(spaces, &value);
if (last.status == KIT_OK) {
i64 n = tag.properties.size;
@@ -203,77 +246,56 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
assert(tag.properties.size == n + 1);
if (tag.properties.size != n + 1) {
- last.status = KIT_ERROR_BAD_ALLOC;
+ last.status |= KIT_ERROR_BAD_ALLOC;
DA_DESTROY(tag.properties);
} else {
- // move
- tag.properties.values[n].name = property.data;
- memset(&property.data, 0, sizeof property.data);
-
- // move
- tag.properties.values[n].value = value.data;
- memset(&value.data, 0, sizeof value.data);
+ last.status |= kit_xml_alloc_and_unescape_(
+ &tag.properties.values[n].name, ib_str(property),
+ alloc);
+ last.status |= kit_xml_alloc_and_unescape_(
+ &tag.properties.values[n].value, ib_str(value), alloc);
}
}
-
- ib_destroy(property);
- ib_destroy(equals);
- ib_destroy(value);
}
- spaces = ib_any(last, SZ(" \t\r\n"));
-
if (tag.is_declaration) {
- ib_t tag_decl_close = ib_exact(spaces, SZ("?>"));
- ib_destroy(spaces);
+ ib_token_t tag_decl_close = ib_exact(spaces, SZ("?>"));
- ib_destroy(last);
last = tag_decl_close;
DA_INIT(tag.text, 0, alloc);
DA_INIT(tag.children, 0, alloc);
} else {
- ib_t tag_close = ib_exact(spaces, SZ(">"));
- ib_t tag_close_empty = ib_exact(spaces, SZ("/>"));
- ib_destroy(spaces);
+ ib_token_t tag_close = ib_exact(spaces, SZ(">"));
+ ib_token_t tag_close_empty = ib_exact(spaces, SZ("/>"));
if (tag_close.status == KIT_OK) {
kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close,
alloc);
- tag.text = im.text;
- tag.children = im.tags;
-
- tagend_open = ib_exact(im.last, SZ("</"));
- ib_destroy(im.last);
- spaces = ib_any(tagend_open, SZ(" \t\r\n"));
- ib_t tagend_name = ib_exact(spaces, WRAP_STR(tag_name.data));
- ib_destroy(spaces);
- spaces = ib_any(tagend_name, SZ(" \t\r\n"));
- ib_t tagend_close = ib_exact(spaces, SZ(">"));
- ib_destroy(spaces);
- ib_destroy(tagend_open);
- ib_destroy(tagend_name);
-
- ib_destroy(last);
+
+ tag.text = im.text;
+ tag.children = im.tags;
+
+ tagend_open = ib_exact(im.last, SZ("</"));
+ spaces = ib_any(tagend_open, SZ(" \t\r\n"));
+ ib_token_t tagend_name = ib_exact(spaces, ib_str(tag_name));
+ spaces = ib_any(tagend_name, SZ(" \t\r\n"));
+ ib_token_t tagend_close = ib_exact(spaces, SZ(">"));
+
last = tagend_close;
} else if (tag_close_empty.status == KIT_OK) {
- ib_destroy(last);
- last = ib_copy(tag_close_empty);
+ last = tag_close_empty;
DA_INIT(tag.text, 0, alloc);
DA_INIT(tag.children, 0, alloc);
} else
- last.status = KIT_ERROR_INTERNAL;
-
- ib_destroy(tag_close);
- ib_destroy(tag_close_empty);
+ last.status |= KIT_PARSING_FAILED;
}
- ib_t tag_tail = kit_xml_parse_text_(last);
+ ib_token_t tag_tail = kit_xml_parse_text_(last, &tag_tail_string);
- ib_destroy(last);
- last = ib_copy(tag_tail);
+ last = tag_tail;
if (last.status == KIT_OK) {
i64 n = res.tags.size;
@@ -281,25 +303,18 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
assert(res.tags.size == n + 1);
if (res.tags.size != n + 1) {
- last.status = KIT_ERROR_BAD_ALLOC;
+ last.status |= KIT_ERROR_BAD_ALLOC;
xml_destroy(&tag);
} else {
- // move
- tag.tag = tag_name.data;
- memset(&tag_name.data, 0, sizeof tag_name.data);
-
- // move
- tag.tail = tag_tail.data;
- memset(&tag_tail.data, 0, sizeof tag_tail.data);
+ last.status |= kit_xml_alloc_and_unescape_(
+ &tag.tag, ib_str(tag_name), alloc);
+ last.status |= kit_xml_alloc_and_unescape_(
+ &tag.tail, WRAP_STR(tag_tail_string), alloc);
res.tags.values[n] = tag;
}
} else
xml_destroy(&tag);
-
- ib_destroy(tag_open);
- ib_destroy(tag_name);
- ib_destroy(tag_tail);
}
if (last.status != KIT_OK) {
@@ -307,37 +322,38 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
xml_destroy(res.tags.values + i);
DA_DESTROY(res.text);
DA_DESTROY(res.tags);
- } else {
- // move
- res.text = tag_text.data;
- memset(&tag_text.data, 0, sizeof tag_text.data);
- }
+ } else
+ last.status |= kit_xml_alloc_and_unescape_(
+ &res.text, WRAP_STR(tag_text_string), alloc);
- ib_destroy(tag_text);
+ DA_DESTROY(tag_text_string);
+ DA_DESTROY(tag_tail_string);
res.last = last;
return res;
}
-kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
+kit_xml_parse_result_t kit_xml_parse(is_handle_t is,
kit_allocator_t *alloc) {
- ib_t ib = ib_wrap(is, alloc);
- kit_xml_intermediate_t im = kit_xml_parse_buf_(ib, alloc);
- ib_destroy(ib);
+ input_buffer_t ib = ib_wrap(is, alloc);
+ kit_xml_intermediate_t im = kit_xml_parse_buf_(ib_token(&ib),
+ alloc);
kit_xml_parse_result_t res;
memset(&res, 0, sizeof res);
res.status = im.last.status;
- ib_destroy(im.last);
- if (res.status != KIT_OK)
+ if (res.status != KIT_OK) {
+ ib_destroy(&ib);
return res;
+ }
if (im.text.size == 0 && im.tags.size == 1) {
res.xml = im.tags.values[0];
DA_DESTROY(im.text);
DA_DESTROY(im.tags);
+ ib_destroy(&ib);
return res;
}
@@ -348,10 +364,14 @@ kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
res.xml.text = im.text;
res.xml.children = im.tags;
+ ib_destroy(&ib);
return res;
}
kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) {
+ // TODO
+ //
+
assert(xml != NULL);
xml_text_t result;
@@ -361,8 +381,7 @@ kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) {
return result;
}
-static kit_status_t kit_xml_append_text_(str_builder_t *buf,
- xml_t *xml) {
+static s32 kit_xml_append_text_(str_builder_t *buf, xml_t *xml) {
assert(buf != NULL);
assert(xml != NULL);
@@ -377,8 +396,7 @@ static kit_status_t kit_xml_append_text_(str_builder_t *buf,
memcpy(buf->values + n, xml->text.values, xml->text.size);
for (i64 i = 0; i < xml->children.size; i++) {
- kit_status_t s = kit_xml_append_text_(buf,
- xml->children.values + i);
+ s32 s = kit_xml_append_text_(buf, xml->children.values + i);
if (s != KIT_OK)
return s;
@@ -415,6 +433,31 @@ kit_xml_text_t kit_xml_full_text(kit_xml_t *xml,
return res;
}
+b8 kit_xml_has_property(kit_xml_t *xml, kit_str_t name) {
+ assert(xml != NULL);
+ if (xml == NULL)
+ return 0;
+
+ for (i64 i = 0; i < xml->properties.size; i++)
+ if (AR_EQUAL(xml->properties.values[i].name, name))
+ return 1;
+
+ return 0;
+}
+
+str_t kit_xml_property(kit_xml_t *xml, str_t name) {
+ assert(xml != NULL);
+ if (xml == NULL)
+ return str(0, NULL);
+
+ for (i64 i = 0; i < xml->properties.size; i++)
+ if (AR_EQUAL(xml->properties.values[i].name, name))
+ return WRAP_STR(xml->properties.values[i].value);
+
+ assert(0);
+ return str(0, NULL);
+}
+
void kit_xml_destroy(kit_xml_t *xml) {
assert(xml != NULL);
if (xml == NULL)