diff options
author | Mitya Selivanov <automainint@guattari.tech> | 2023-09-19 05:34:00 +0200 |
---|---|---|
committer | Mitya Selivanov <automainint@guattari.tech> | 2023-09-19 05:34:00 +0200 |
commit | 18c419ffb4e750c3c9ea8570cf18f3099267b1bb (patch) | |
tree | a1d936834a54934cb4407f870d2ac1dbaa87cd3b /source/kit/xml.c | |
parent | 8499b75fb011d72eeb5acbd85bcf41e4ee51e9a7 (diff) | |
download | kit-18c419ffb4e750c3c9ea8570cf18f3099267b1bb.zip |
Update xml parsing
Diffstat (limited to 'source/kit/xml.c')
-rw-r--r-- | source/kit/xml.c | 230 |
1 files changed, 182 insertions, 48 deletions
diff --git a/source/kit/xml.c b/source/kit/xml.c index bb16192..2bef6d5 100644 --- a/source/kit/xml.c +++ b/source/kit/xml.c @@ -3,73 +3,197 @@ #include "input_buffer.h" #include <assert.h> -kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is, - kit_allocator_t *alloc) { - xml_parse_result_t res; +typedef struct { + ib_t last; + kit_xml_t xml; +} kit_xml_intermediate_t; + +static kit_xml_intermediate_t kit_xml_parse_buf_( + ib_t begin, kit_allocator_t *alloc) { + kit_xml_intermediate_t res; memset(&res, 0, sizeof res); - ib_t begin = ib_wrap(is, alloc); - ib_t tag_before = ib_until(begin, SZ("<")); - ib_t tag_open = ib_exact(tag_before, SZ("<")); - ib_t tag_name = ib_until(tag_open, SZ(">")); - ib_t tag_name_empty = ib_until(tag_open, SZ("/")); - -#define return_ \ - ib_destroy(begin); \ - ib_destroy(tag_before); \ - ib_destroy(tag_open); \ - ib_destroy(tag_name); \ - ib_destroy(tag_name_empty); \ - ib_destroy(tag_close); \ + ib_t last, spaces; + memset(&last, 0, sizeof last); + memset(&spaces, 0, sizeof spaces); + + ib_t tag_open = ib_exact(begin, SZ("<")); + ib_t tag_name = ib_none(tag_open, SZ(" \t\r\n/>")); + last = ib_copy(tag_name); + + DA_INIT(res.xml.properties, 0, alloc); + + for (;;) { + spaces = ib_any(last, SZ(" \t\r\n")); + ib_t property = ib_none(spaces, SZ(" \t\r\n=/>")); + ib_destroy(spaces); + + if (property.status != KIT_OK || property.data.size == 0) { + ib_destroy(property); + break; + } + + spaces = ib_any(property, SZ(" \t\r\n")); + ib_t equals = ib_exact(spaces, SZ("=")); + ib_destroy(spaces); + spaces = ib_any(equals, SZ(" \t\r\n")); + ib_t value_open = ib_exact(spaces, SZ("\"")); + ib_t value_text = ib_until(value_open, SZ("\"")); + ib_t value_close = ib_exact(value_text, SZ("\"")); + + if (value_close.status == KIT_OK) { + i64 n = res.xml.properties.size; + DA_RESIZE(res.xml.properties, n + 1); + + if (res.xml.properties.size != n + 1) { + res.last = value_close; + res.last.status = KIT_ERROR_BAD_ALLOC; + + ib_destroy(begin); + ib_destroy(tag_open); + ib_destroy(tag_name); + ib_destroy(last); + ib_destroy(spaces); + ib_destroy(property); + ib_destroy(equals); + ib_destroy(value_open); + ib_destroy(value_text); + + DA_DESTROY(res.xml.properties); + return res; + } + + // move + res.xml.properties.values[n].name = property.data; + memset(&property.data, 0, sizeof property.data); + + // move + res.xml.properties.values[n].value = value_text.data; + memset(&value_text.data, 0, sizeof value_text.data); + } + + ib_destroy(last); + last = ib_copy(value_close); + + ib_destroy(spaces); + ib_destroy(property); + ib_destroy(equals); + ib_destroy(value_open); + ib_destroy(value_text); + ib_destroy(value_close); + + if (value_close.status != KIT_OK) + break; + } + + spaces = ib_any(last, SZ(" \t\r\n")); + ib_t tag_close = ib_exact(spaces, SZ(">")); + ib_t tag_close_empty = ib_exact(spaces, SZ("/>")); + + ib_destroy(last); + ib_destroy(spaces); + +#define return_ \ + ib_destroy(begin); \ + ib_destroy(tag_open); \ + ib_destroy(tag_name); \ + ib_destroy(tag_close); \ + ib_destroy(tag_close_empty); \ return - if (tag_name_empty.offset < tag_name.offset) { - ib_t tag_close = ib_exact(tag_name_empty, SZ("/>")); + if (tag_close_empty.status == KIT_OK) { + ib_t tag_tail = ib_until(tag_close_empty, SZ("<")); + + if (tag_tail.status != KIT_OK) { + res.last = tag_tail; - if (tag_close.status != KIT_OK) { - res.status = KIT_ERROR_INTERNAL; + DA_DESTROY(res.xml.properties); return_ res; } // move - res.xml.tag = tag_name_empty.data; - memset(&tag_name_empty.data, 0, sizeof tag_name_empty.data); + res.xml.tag = tag_name.data; + memset(&tag_name.data, 0, sizeof tag_name.data); - while (res.xml.tag.size > 0 && - res.xml.tag.values[res.xml.tag.size - 1] == ' ') - --res.xml.tag.size; + // move + res.xml.tail = tag_tail.data; + memset(&tag_tail.data, 0, sizeof tag_tail.data); DA_INIT(res.xml.text, 0, alloc); - DA_INIT(res.xml.tail, 0, alloc); - DA_INIT(res.xml.properties, 0, alloc); DA_INIT(res.xml.children, 0, alloc); - res.status = KIT_OK; + res.last = tag_tail; return_ res; } - ib_t tag_close = ib_exact(tag_name, SZ(">")); - ib_t tag_text = ib_until(tag_close, SZ("<")); - ib_t tagend_open = ib_exact(tag_text, SZ("</")); + ib_t tag_text = ib_until(tag_close, SZ("<")); + last = ib_copy(tag_text); + + DA_INIT(res.xml.children, 0, alloc); + + if (last.status == KIT_OK) + for (;;) { + // TODO + // Refactor: make this outer loop. + // + + ib_t next = ib_exact(last, SZ("</")); + + if (next.status == KIT_OK) { + ib_destroy(last); + last = next; + break; + } + + ib_destroy(next); + + kit_xml_intermediate_t im = kit_xml_parse_buf_(last, alloc); + + last = im.last; + + if (last.status != KIT_OK) + break; + + i64 n = res.xml.children.size; + DA_RESIZE(res.xml.children, n + 1); + + if (res.xml.children.size != n + 1) { + res.last = last; + res.last.status = KIT_ERROR_BAD_ALLOC; + + ib_destroy(tag_text); + + DA_DESTROY(res.xml.properties); + DA_DESTROY(res.xml.children); + return_ res; + } + + res.xml.children.values[n] = im.xml; + } + + ib_t tagend_open = last; ib_t tagend_name = ib_exact(tagend_open, WRAP_STR(tag_name.data)); ib_t tagend_close = ib_exact(tagend_name, SZ(">")); + ib_t tag_tail = ib_until(tagend_close, SZ("<")); #undef return_ -#define return_ \ - ib_destroy(begin); \ - ib_destroy(tag_before); \ - ib_destroy(tag_open); \ - ib_destroy(tag_name); \ - ib_destroy(tag_name_empty); \ - ib_destroy(tag_close); \ - ib_destroy(tag_text); \ - ib_destroy(tagend_open); \ - ib_destroy(tagend_name); \ - ib_destroy(tagend_close); \ +#define return_ \ + ib_destroy(begin); \ + ib_destroy(tag_open); \ + ib_destroy(tag_name); \ + ib_destroy(tag_close); \ + ib_destroy(tag_close_empty); \ + ib_destroy(tag_text); \ + ib_destroy(tagend_open); \ + ib_destroy(tagend_name); \ + ib_destroy(tagend_close); \ return - if (tagend_close.status != KIT_OK) { - res.status = KIT_ERROR_INTERNAL; + if (tag_tail.status != KIT_OK) { + res.last = tag_tail; + + DA_DESTROY(res.xml.properties); + DA_DESTROY(res.xml.children); return_ res; } @@ -81,15 +205,25 @@ kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is, res.xml.text = tag_text.data; memset(&tag_text.data, 0, sizeof tag_text.data); - DA_INIT(res.xml.tail, 0, alloc); - DA_INIT(res.xml.properties, 0, alloc); - DA_INIT(res.xml.children, 0, alloc); + // move + res.xml.tail = tag_tail.data; + memset(&tag_tail.data, 0, sizeof tag_tail.data); - res.status = KIT_OK; + res.last = tag_tail; return_ res; #undef return_ } +kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is, + kit_allocator_t *alloc) { + kit_xml_intermediate_t im = kit_xml_parse_buf_(ib_wrap(is, alloc), + alloc); + kit_xml_parse_result_t res = { .status = im.last.status, + .xml = im.xml }; + ib_destroy(im.last); + return res; +} + kit_xml_print_result_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) { assert(xml != NULL); |