summaryrefslogtreecommitdiff
path: root/source/kit/xml.c
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2023-09-19 05:34:00 +0200
committerMitya Selivanov <automainint@guattari.tech>2023-09-19 05:34:00 +0200
commit18c419ffb4e750c3c9ea8570cf18f3099267b1bb (patch)
treea1d936834a54934cb4407f870d2ac1dbaa87cd3b /source/kit/xml.c
parent8499b75fb011d72eeb5acbd85bcf41e4ee51e9a7 (diff)
downloadkit-18c419ffb4e750c3c9ea8570cf18f3099267b1bb.zip
Update xml parsing
Diffstat (limited to 'source/kit/xml.c')
-rw-r--r--source/kit/xml.c230
1 files changed, 182 insertions, 48 deletions
diff --git a/source/kit/xml.c b/source/kit/xml.c
index bb16192..2bef6d5 100644
--- a/source/kit/xml.c
+++ b/source/kit/xml.c
@@ -3,73 +3,197 @@
#include "input_buffer.h"
#include <assert.h>
-kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
- kit_allocator_t *alloc) {
- xml_parse_result_t res;
+typedef struct {
+ ib_t last;
+ kit_xml_t xml;
+} kit_xml_intermediate_t;
+
+static kit_xml_intermediate_t kit_xml_parse_buf_(
+ ib_t begin, kit_allocator_t *alloc) {
+ kit_xml_intermediate_t res;
memset(&res, 0, sizeof res);
- ib_t begin = ib_wrap(is, alloc);
- ib_t tag_before = ib_until(begin, SZ("<"));
- ib_t tag_open = ib_exact(tag_before, SZ("<"));
- ib_t tag_name = ib_until(tag_open, SZ(">"));
- ib_t tag_name_empty = ib_until(tag_open, SZ("/"));
-
-#define return_ \
- ib_destroy(begin); \
- ib_destroy(tag_before); \
- ib_destroy(tag_open); \
- ib_destroy(tag_name); \
- ib_destroy(tag_name_empty); \
- ib_destroy(tag_close); \
+ ib_t last, spaces;
+ memset(&last, 0, sizeof last);
+ memset(&spaces, 0, sizeof spaces);
+
+ ib_t tag_open = ib_exact(begin, SZ("<"));
+ ib_t tag_name = ib_none(tag_open, SZ(" \t\r\n/>"));
+ last = ib_copy(tag_name);
+
+ DA_INIT(res.xml.properties, 0, alloc);
+
+ for (;;) {
+ spaces = ib_any(last, SZ(" \t\r\n"));
+ ib_t property = ib_none(spaces, SZ(" \t\r\n=/>"));
+ ib_destroy(spaces);
+
+ if (property.status != KIT_OK || property.data.size == 0) {
+ ib_destroy(property);
+ break;
+ }
+
+ spaces = ib_any(property, SZ(" \t\r\n"));
+ ib_t equals = ib_exact(spaces, SZ("="));
+ ib_destroy(spaces);
+ spaces = ib_any(equals, SZ(" \t\r\n"));
+ ib_t value_open = ib_exact(spaces, SZ("\""));
+ ib_t value_text = ib_until(value_open, SZ("\""));
+ ib_t value_close = ib_exact(value_text, SZ("\""));
+
+ if (value_close.status == KIT_OK) {
+ i64 n = res.xml.properties.size;
+ DA_RESIZE(res.xml.properties, n + 1);
+
+ if (res.xml.properties.size != n + 1) {
+ res.last = value_close;
+ res.last.status = KIT_ERROR_BAD_ALLOC;
+
+ ib_destroy(begin);
+ ib_destroy(tag_open);
+ ib_destroy(tag_name);
+ ib_destroy(last);
+ ib_destroy(spaces);
+ ib_destroy(property);
+ ib_destroy(equals);
+ ib_destroy(value_open);
+ ib_destroy(value_text);
+
+ DA_DESTROY(res.xml.properties);
+ return res;
+ }
+
+ // move
+ res.xml.properties.values[n].name = property.data;
+ memset(&property.data, 0, sizeof property.data);
+
+ // move
+ res.xml.properties.values[n].value = value_text.data;
+ memset(&value_text.data, 0, sizeof value_text.data);
+ }
+
+ ib_destroy(last);
+ last = ib_copy(value_close);
+
+ ib_destroy(spaces);
+ ib_destroy(property);
+ ib_destroy(equals);
+ ib_destroy(value_open);
+ ib_destroy(value_text);
+ ib_destroy(value_close);
+
+ if (value_close.status != KIT_OK)
+ break;
+ }
+
+ spaces = ib_any(last, SZ(" \t\r\n"));
+ ib_t tag_close = ib_exact(spaces, SZ(">"));
+ ib_t tag_close_empty = ib_exact(spaces, SZ("/>"));
+
+ ib_destroy(last);
+ ib_destroy(spaces);
+
+#define return_ \
+ ib_destroy(begin); \
+ ib_destroy(tag_open); \
+ ib_destroy(tag_name); \
+ ib_destroy(tag_close); \
+ ib_destroy(tag_close_empty); \
return
- if (tag_name_empty.offset < tag_name.offset) {
- ib_t tag_close = ib_exact(tag_name_empty, SZ("/>"));
+ if (tag_close_empty.status == KIT_OK) {
+ ib_t tag_tail = ib_until(tag_close_empty, SZ("<"));
+
+ if (tag_tail.status != KIT_OK) {
+ res.last = tag_tail;
- if (tag_close.status != KIT_OK) {
- res.status = KIT_ERROR_INTERNAL;
+ DA_DESTROY(res.xml.properties);
return_ res;
}
// move
- res.xml.tag = tag_name_empty.data;
- memset(&tag_name_empty.data, 0, sizeof tag_name_empty.data);
+ res.xml.tag = tag_name.data;
+ memset(&tag_name.data, 0, sizeof tag_name.data);
- while (res.xml.tag.size > 0 &&
- res.xml.tag.values[res.xml.tag.size - 1] == ' ')
- --res.xml.tag.size;
+ // move
+ res.xml.tail = tag_tail.data;
+ memset(&tag_tail.data, 0, sizeof tag_tail.data);
DA_INIT(res.xml.text, 0, alloc);
- DA_INIT(res.xml.tail, 0, alloc);
- DA_INIT(res.xml.properties, 0, alloc);
DA_INIT(res.xml.children, 0, alloc);
- res.status = KIT_OK;
+ res.last = tag_tail;
return_ res;
}
- ib_t tag_close = ib_exact(tag_name, SZ(">"));
- ib_t tag_text = ib_until(tag_close, SZ("<"));
- ib_t tagend_open = ib_exact(tag_text, SZ("</"));
+ ib_t tag_text = ib_until(tag_close, SZ("<"));
+ last = ib_copy(tag_text);
+
+ DA_INIT(res.xml.children, 0, alloc);
+
+ if (last.status == KIT_OK)
+ for (;;) {
+ // TODO
+ // Refactor: make this outer loop.
+ //
+
+ ib_t next = ib_exact(last, SZ("</"));
+
+ if (next.status == KIT_OK) {
+ ib_destroy(last);
+ last = next;
+ break;
+ }
+
+ ib_destroy(next);
+
+ kit_xml_intermediate_t im = kit_xml_parse_buf_(last, alloc);
+
+ last = im.last;
+
+ if (last.status != KIT_OK)
+ break;
+
+ i64 n = res.xml.children.size;
+ DA_RESIZE(res.xml.children, n + 1);
+
+ if (res.xml.children.size != n + 1) {
+ res.last = last;
+ res.last.status = KIT_ERROR_BAD_ALLOC;
+
+ ib_destroy(tag_text);
+
+ DA_DESTROY(res.xml.properties);
+ DA_DESTROY(res.xml.children);
+ return_ res;
+ }
+
+ res.xml.children.values[n] = im.xml;
+ }
+
+ ib_t tagend_open = last;
ib_t tagend_name = ib_exact(tagend_open, WRAP_STR(tag_name.data));
ib_t tagend_close = ib_exact(tagend_name, SZ(">"));
+ ib_t tag_tail = ib_until(tagend_close, SZ("<"));
#undef return_
-#define return_ \
- ib_destroy(begin); \
- ib_destroy(tag_before); \
- ib_destroy(tag_open); \
- ib_destroy(tag_name); \
- ib_destroy(tag_name_empty); \
- ib_destroy(tag_close); \
- ib_destroy(tag_text); \
- ib_destroy(tagend_open); \
- ib_destroy(tagend_name); \
- ib_destroy(tagend_close); \
+#define return_ \
+ ib_destroy(begin); \
+ ib_destroy(tag_open); \
+ ib_destroy(tag_name); \
+ ib_destroy(tag_close); \
+ ib_destroy(tag_close_empty); \
+ ib_destroy(tag_text); \
+ ib_destroy(tagend_open); \
+ ib_destroy(tagend_name); \
+ ib_destroy(tagend_close); \
return
- if (tagend_close.status != KIT_OK) {
- res.status = KIT_ERROR_INTERNAL;
+ if (tag_tail.status != KIT_OK) {
+ res.last = tag_tail;
+
+ DA_DESTROY(res.xml.properties);
+ DA_DESTROY(res.xml.children);
return_ res;
}
@@ -81,15 +205,25 @@ kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
res.xml.text = tag_text.data;
memset(&tag_text.data, 0, sizeof tag_text.data);
- DA_INIT(res.xml.tail, 0, alloc);
- DA_INIT(res.xml.properties, 0, alloc);
- DA_INIT(res.xml.children, 0, alloc);
+ // move
+ res.xml.tail = tag_tail.data;
+ memset(&tag_tail.data, 0, sizeof tag_tail.data);
- res.status = KIT_OK;
+ res.last = tag_tail;
return_ res;
#undef return_
}
+kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
+ kit_allocator_t *alloc) {
+ kit_xml_intermediate_t im = kit_xml_parse_buf_(ib_wrap(is, alloc),
+ alloc);
+ kit_xml_parse_result_t res = { .status = im.last.status,
+ .xml = im.xml };
+ ib_destroy(im.last);
+ return res;
+}
+
kit_xml_print_result_t kit_xml_print(kit_xml_t *xml,
kit_allocator_t *alloc) {
assert(xml != NULL);