summaryrefslogtreecommitdiff
path: root/source/kit/xml.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/kit/xml.c')
-rw-r--r--source/kit/xml.c437
1 files changed, 437 insertions, 0 deletions
diff --git a/source/kit/xml.c b/source/kit/xml.c
new file mode 100644
index 0000000..c62ee82
--- /dev/null
+++ b/source/kit/xml.c
@@ -0,0 +1,437 @@
+#include "xml.h"
+
+#include "input_buffer.h"
+#include <assert.h>
+
+typedef struct {
+ ib_t last;
+ kit_str_builder_t text;
+ kit_da_xml_t tags;
+} kit_xml_intermediate_t;
+
+static kit_status_t kit_xml_unescape_(str_builder_t *str) {
+ assert(str != NULL);
+
+ str_builder_t buf;
+ DA_INIT(buf, str->size, str->alloc);
+ buf.size = 0;
+
+ for (i64 i = 0; i < str->size; i++)
+ if (str->values[i] != '&')
+ buf.values[buf.size++] = str->values[i];
+ else {
+ i64 n = 1;
+ while (i + n < str->size && str->values[i + n] != ';') n++;
+ if (i + n >= str->size) {
+ DA_DESTROY(buf);
+ return KIT_ERROR_INTERNAL;
+ }
+ if (n == 3 && memcmp(str->values + i, "&lt;", 4) == 0)
+ buf.values[buf.size++] = '<';
+ else if (n == 3 && memcmp(str->values + i, "&gt;", 4) == 0)
+ buf.values[buf.size++] = '>';
+ else if (n == 4 && memcmp(str->values + i, "&amp;", 5) == 0)
+ buf.values[buf.size++] = '&';
+ else if (n == 5 && memcmp(str->values + i, "&quot;", 6) == 0)
+ buf.values[buf.size++] = '"';
+ else if (n == 5 && memcmp(str->values + i, "&apos;", 6) == 0)
+ buf.values[buf.size++] = '\'';
+ else {
+ DA_DESTROY(buf);
+ return KIT_ERROR_INTERNAL;
+ }
+ i += n;
+ }
+
+ DA_DESTROY(*str);
+ *str = buf;
+
+ return KIT_OK;
+}
+
+static ib_t kit_xml_parse_text_(ib_t begin) {
+ ib_t text = ib_until(begin, SZ("<"));
+ ib_t last = ib_copy(text);
+
+ for (;;) {
+ ib_t comment_open = ib_exact(last, SZ("<!--"));
+
+ if (comment_open.status != KIT_OK) {
+ ib_destroy(comment_open);
+ break;
+ }
+
+ ib_t comment_text = ib_until(comment_open, SZ("-->"));
+ ib_t comment_close = ib_exact(comment_text, SZ("-->"));
+ ib_t next_text = ib_until(comment_close, SZ("<"));
+
+ if (next_text.status == KIT_OK && next_text.data.size > 0) {
+ i64 n = text.data.size;
+ DA_RESIZE(text.data, n + next_text.data.size);
+
+ assert(text.data.size == n + next_text.data.size);
+ if (text.data.size != n + next_text.data.size)
+ next_text.status = KIT_ERROR_BAD_ALLOC;
+ else
+ memcpy(text.data.values + n, next_text.data.values,
+ next_text.data.size);
+ }
+
+ ib_destroy(last);
+ last = ib_copy(next_text);
+
+ ib_destroy(comment_open);
+ ib_destroy(comment_text);
+ ib_destroy(comment_close);
+ ib_destroy(next_text);
+ }
+
+ // move
+ DA_DESTROY(last.data);
+ last.data = text.data;
+ memset(&text.data, 0, sizeof text.data);
+
+ kit_status_t s = kit_xml_unescape_(&last.data);
+ if (s != KIT_OK)
+ last.status = s;
+
+ ib_destroy(text);
+
+ return last;
+}
+
+static ib_t kit_xml_parse_string_(ib_t begin) {
+ ib_t quotes_open = ib_exact(begin, SZ("\""));
+ ib_t apostr_open = ib_exact(begin, SZ("'"));
+
+ ib_t open = quotes_open.status == KIT_OK ? quotes_open
+ : apostr_open;
+
+ ib_t text = ib_until(open, WRAP_STR(open.data));
+ ib_t close = ib_exact(text, WRAP_STR(open.data));
+
+ // move
+ DA_DESTROY(close.data);
+ close.data = text.data;
+ memset(&text.data, 0, sizeof text.data);
+
+ kit_status_t s = kit_xml_unescape_(&close.data);
+ if (s == KIT_OK)
+ close.status = s;
+
+ ib_destroy(quotes_open);
+ ib_destroy(apostr_open);
+ ib_destroy(text);
+
+ return close;
+}
+
+static kit_xml_intermediate_t kit_xml_parse_buf_(
+ ib_t begin, kit_allocator_t *alloc) {
+ kit_xml_intermediate_t res;
+ memset(&res, 0, sizeof res);
+
+ ib_t last, spaces;
+ memset(&last, 0, sizeof last);
+ memset(&spaces, 0, sizeof spaces);
+
+ ib_t tag_text = kit_xml_parse_text_(begin);
+ last = ib_copy(tag_text);
+
+ DA_INIT(res.tags, 0, alloc);
+
+ for (;;) {
+ ib_t tagend_open = ib_exact(last, SZ("</"));
+ ib_destroy(tagend_open);
+ if (tagend_open.status == KIT_OK)
+ break;
+
+ ib_t tag_open = ib_exact(last, SZ("<"));
+
+ if (tag_open.status != KIT_OK) {
+ ib_destroy(tag_open);
+ break;
+ }
+
+ xml_t tag;
+ memset(&tag, 0, sizeof tag);
+
+ ib_t decl_open = ib_exact(tag_open, SZ("?"));
+
+ ib_destroy(last);
+
+ if (decl_open.status == KIT_OK) {
+ tag.is_declaration = 1;
+ last = ib_copy(decl_open);
+ } else
+ last = ib_copy(tag_open);
+
+ ib_destroy(decl_open);
+
+ spaces = ib_any(last, SZ(" \t\r\n"));
+ ib_t tag_name = ib_none(spaces, SZ(" \t\r\n/>"));
+ ib_destroy(spaces);
+
+ DA_INIT(tag.properties, 0, alloc);
+
+ ib_destroy(last);
+ last = ib_copy(tag_name);
+
+ for (;;) {
+ spaces = ib_any(last, SZ(" \t\r\n"));
+ ib_t property = ib_none(spaces, SZ(" \t\r\n=?/>"));
+ ib_destroy(spaces);
+
+ if (property.status != KIT_OK || property.data.size == 0) {
+ ib_destroy(property);
+ break;
+ }
+
+ spaces = ib_any(property, SZ(" \t\r\n"));
+ ib_t equals = ib_exact(spaces, SZ("="));
+ ib_destroy(spaces);
+ spaces = ib_any(equals, SZ(" \t\r\n"));
+ ib_t value = kit_xml_parse_string_(spaces);
+ ib_destroy(spaces);
+
+ ib_destroy(last);
+ last = ib_copy(value);
+
+ if (last.status == KIT_OK) {
+ i64 n = tag.properties.size;
+ DA_RESIZE(tag.properties, n + 1);
+
+ assert(tag.properties.size == n + 1);
+ if (tag.properties.size != n + 1) {
+ last.status = KIT_ERROR_BAD_ALLOC;
+ DA_DESTROY(tag.properties);
+ } else {
+ // move
+ tag.properties.values[n].name = property.data;
+ memset(&property.data, 0, sizeof property.data);
+
+ // move
+ tag.properties.values[n].value = value.data;
+ memset(&value.data, 0, sizeof value.data);
+ }
+ }
+
+ ib_destroy(property);
+ ib_destroy(equals);
+ ib_destroy(value);
+ }
+
+ spaces = ib_any(last, SZ(" \t\r\n"));
+
+ if (tag.is_declaration) {
+ ib_t tag_decl_close = ib_exact(spaces, SZ("?>"));
+ ib_destroy(spaces);
+
+ ib_destroy(last);
+ last = tag_decl_close;
+
+ DA_INIT(tag.text, 0, alloc);
+ DA_INIT(tag.children, 0, alloc);
+ } else {
+ ib_t tag_close = ib_exact(spaces, SZ(">"));
+ ib_t tag_close_empty = ib_exact(spaces, SZ("/>"));
+ ib_destroy(spaces);
+
+ if (tag_close.status == KIT_OK) {
+ kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close,
+ alloc);
+ tag.text = im.text;
+ tag.children = im.tags;
+
+ tagend_open = ib_exact(im.last, SZ("</"));
+ ib_destroy(im.last);
+ spaces = ib_any(tagend_open, SZ(" \t\r\n"));
+ ib_t tagend_name = ib_exact(spaces, WRAP_STR(tag_name.data));
+ ib_destroy(spaces);
+ spaces = ib_any(tagend_name, SZ(" \t\r\n"));
+ ib_t tagend_close = ib_exact(spaces, SZ(">"));
+ ib_destroy(spaces);
+ ib_destroy(tagend_open);
+ ib_destroy(tagend_name);
+
+ ib_destroy(last);
+ last = tagend_close;
+
+ } else if (tag_close_empty.status == KIT_OK) {
+ ib_destroy(last);
+ last = ib_copy(tag_close_empty);
+
+ DA_INIT(tag.text, 0, alloc);
+ DA_INIT(tag.children, 0, alloc);
+ } else
+ last.status = KIT_ERROR_INTERNAL;
+
+ ib_destroy(tag_close);
+ ib_destroy(tag_close_empty);
+ }
+
+ ib_t tag_tail = kit_xml_parse_text_(last);
+
+ ib_destroy(last);
+ last = ib_copy(tag_tail);
+
+ if (last.status == KIT_OK) {
+ i64 n = res.tags.size;
+ DA_RESIZE(res.tags, n + 1);
+
+ assert(res.tags.size == n + 1);
+ if (res.tags.size != n + 1) {
+ last.status = KIT_ERROR_BAD_ALLOC;
+ xml_destroy(&tag);
+ } else {
+ // move
+ tag.tag = tag_name.data;
+ memset(&tag_name.data, 0, sizeof tag_name.data);
+
+ // move
+ tag.tail = tag_tail.data;
+ memset(&tag_tail.data, 0, sizeof tag_tail.data);
+
+ res.tags.values[n] = tag;
+ }
+ } else
+ xml_destroy(&tag);
+
+ ib_destroy(tag_open);
+ ib_destroy(tag_name);
+ ib_destroy(tag_tail);
+ }
+
+ if (last.status != KIT_OK) {
+ for (i64 i = 0; i < res.tags.size; i++)
+ xml_destroy(res.tags.values + i);
+ DA_DESTROY(res.text);
+ DA_DESTROY(res.tags);
+ } else {
+ // move
+ res.text = tag_text.data;
+ memset(&tag_text.data, 0, sizeof tag_text.data);
+ }
+
+ ib_destroy(tag_text);
+
+ res.last = last;
+ return res;
+}
+
+kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
+ kit_allocator_t *alloc) {
+ ib_t ib = ib_wrap(is, alloc);
+ kit_xml_intermediate_t im = kit_xml_parse_buf_(ib, alloc);
+ ib_destroy(ib);
+
+ kit_xml_parse_result_t res;
+ memset(&res, 0, sizeof res);
+
+ res.status = im.last.status;
+ ib_destroy(im.last);
+
+ if (res.status != KIT_OK)
+ return res;
+
+ if (im.text.size == 0 && im.tags.size == 1) {
+ res.xml = im.tags.values[0];
+ DA_DESTROY(im.text);
+ DA_DESTROY(im.tags);
+ return res;
+ }
+
+ DA_INIT(res.xml.tag, 0, alloc);
+ DA_INIT(res.xml.tail, 0, alloc);
+ DA_INIT(res.xml.properties, 0, alloc);
+
+ res.xml.text = im.text;
+ res.xml.children = im.tags;
+
+ return res;
+}
+
+kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) {
+ assert(xml != NULL);
+
+ xml_text_t result;
+ memset(&result, 0, sizeof result);
+
+ result.status = KIT_ERROR_NOT_IMPLEMENTED;
+ return result;
+}
+
+static kit_status_t kit_xml_append_text_(str_builder_t *buf,
+ xml_t *xml) {
+ assert(buf != NULL);
+ assert(xml != NULL);
+
+ i64 n = buf->size;
+ DA_RESIZE(*buf, n + xml->text.size);
+
+ assert(buf->size == n + xml->text.size);
+ if (buf->size != n + xml->text.size)
+ return KIT_ERROR_BAD_ALLOC;
+
+ if (xml->text.size > 0)
+ memcpy(buf->values + n, xml->text.values, xml->text.size);
+
+ for (i64 i = 0; i < xml->children.size; i++) {
+ kit_status_t s = kit_xml_append_text_(buf,
+ xml->children.values + i);
+ if (s != KIT_OK)
+ return s;
+
+ str_t tail = WRAP_STR(xml->children.values[i].tail);
+
+ if (tail.size <= 0)
+ continue;
+
+ n = buf->size;
+ DA_RESIZE(*buf, n + tail.size);
+
+ assert(buf->size == n + tail.size);
+ if (buf->size != n + tail.size)
+ return KIT_ERROR_BAD_ALLOC;
+
+ if (tail.size > 0)
+ memcpy(buf->values + n, tail.values, tail.size);
+ }
+
+ return KIT_OK;
+}
+
+kit_xml_text_t kit_xml_full_text(kit_xml_t *xml,
+ kit_allocator_t *alloc) {
+ kit_xml_text_t res;
+ res.status = KIT_OK;
+ DA_INIT(res.text, 0, alloc);
+
+ if (xml != NULL)
+ res.status = kit_xml_append_text_(&res.text, xml);
+ else
+ res.status = KIT_ERROR_INVALID_ARGUMENT;
+
+ return res;
+}
+
+void kit_xml_destroy(kit_xml_t *xml) {
+ assert(xml != NULL);
+ if (xml == NULL)
+ return;
+
+ for (i64 i = 0; i < xml->properties.size; i++) {
+ DA_DESTROY(xml->properties.values[i].name);
+ DA_DESTROY(xml->properties.values[i].value);
+ }
+
+ for (i64 i = 0; i < xml->children.size; i++)
+ kit_xml_destroy(xml->children.values + i);
+
+ DA_DESTROY(xml->tag);
+ DA_DESTROY(xml->text);
+ DA_DESTROY(xml->tail);
+
+ DA_DESTROY(xml->properties);
+ DA_DESTROY(xml->children);
+}