summaryrefslogtreecommitdiff
path: root/kit/xml.c
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2024-11-17 05:24:04 +0100
committerMitya Selivanov <automainint@guattari.tech>2024-11-17 05:24:04 +0100
commitd5a727a062e4d727491f4e7f047269d60a93a8cf (patch)
tree565aec155ab056f1bef8ddc7dc78d52e65aa76b7 /kit/xml.c
parent4ba961dcbf60f7c12f53f46baa261757e15d5931 (diff)
downloadsaw-d5a727a062e4d727491f4e7f047269d60a93a8cf.zip
Remove lots of dependencies
Diffstat (limited to 'kit/xml.c')
-rw-r--r--kit/xml.c483
1 files changed, 0 insertions, 483 deletions
diff --git a/kit/xml.c b/kit/xml.c
deleted file mode 100644
index f61bfc1..0000000
--- a/kit/xml.c
+++ /dev/null
@@ -1,483 +0,0 @@
-#include "xml.h"
-
-#include "input_buffer.h"
-#include <assert.h>
-
-typedef struct {
- ib_token_t last;
- str_builder_t text;
- kit_da_xml_t tags;
-} kit_xml_intermediate_t;
-
-static s32 kit_xml_alloc_and_unescape_(str_builder_t *dst, str_t str,
- kit_allocator_t *alloc) {
- assert(dst != NULL);
- assert(str.size == 0 || str.values != NULL);
-
- if (dst == NULL)
- return KIT_ERROR_INTERNAL;
- if (str.size != 0 && str.values == NULL)
- return KIT_ERROR_INTERNAL;
-
- DA_INIT(*dst, str.size, alloc);
-
- if (dst->size != str.size)
- return KIT_ERROR_OUT_OF_MEMORY;
-
- dst->size = 0;
-
- for (i64 i = 0; i < str.size; i++)
- if (str.values[i] != '&')
- dst->values[dst->size++] = str.values[i];
- else {
- i64 n = 1;
- while (i + n < str.size && str.values[i + n] != ';')
- n++;
- if (i + n >= str.size) {
- DA_DESTROY(*dst);
- return KIT_PARSING_FAILED;
- }
- if (n == 3 && memcmp(str.values + i, "&lt;", 4) == 0)
- dst->values[dst->size++] = '<';
- else if (n == 3 && memcmp(str.values + i, "&gt;", 4) == 0)
- dst->values[dst->size++] = '>';
- else if (n == 4 && memcmp(str.values + i, "&amp;", 5) == 0)
- dst->values[dst->size++] = '&';
- else if (n == 5 && memcmp(str.values + i, "&quot;", 6) == 0)
- dst->values[dst->size++] = '"';
- else if (n == 5 && memcmp(str.values + i, "&apos;", 6) == 0)
- dst->values[dst->size++] = '\'';
- else if (n - 2 <= 8 && str.values[i + 1] == '#' &&
- str.values[i + 2] == 'x') {
- // hex encoding
- //
-
- c8 buf[8];
- u64 x = 0;
-
- memcpy(buf, str.values + (i + 3), n - 2);
-
- for (i64 k = 0; k < n - 2; k++) {
- c8 c = str.values[i + 3 + k];
- x <<= 8;
- if (c >= '0' && c <= '9')
- x |= (c - '0');
- else if (c >= 'a' && c <= 'f')
- x |= 10 + (c - 'a');
- else if (c >= 'A' && c <= 'F')
- x |= 10 + (c - 'A');
- else {
- x = 0;
- break;
- }
- }
-
- if (x == 0 || x > 255u) {
- // TODO
- // UTF-8 encoding
-
- DA_DESTROY(*dst);
- return KIT_PARSING_FAILED;
- }
-
- dst->values[dst->size++] = (c8) x;
- } else if (n - 1 <= 20 && str.values[i + 1] == '#') {
- // dec encoding
- //
-
- c8 buf[20];
- u64 x = 0;
-
- memcpy(buf, str.values + (i + 2), n - 2);
-
- for (i64 k = 0; k < n - 1; k++) {
- c8 c = str.values[i + 2 + k];
- x *= 10;
- if (c >= '0' && c <= '9')
- x += (c - '0');
- else {
- x = 0;
- break;
- }
- }
-
- if (x == 0 || x > 255u) {
- // TODO
- // UTF-8 encoding
-
- DA_DESTROY(*dst);
- return KIT_PARSING_FAILED;
- }
-
- dst->values[dst->size++] = (c8) x;
- } else {
- DA_DESTROY(*dst);
- return KIT_PARSING_FAILED;
- }
- i += n;
- }
-
- return KIT_OK;
-}
-
-static ib_token_t kit_xml_parse_text_(ib_token_t begin,
- str_builder_t *dst) {
- ib_token_t last = ib_until(begin, SZ("<"));
-
- DA_RESIZE(*dst, last.size);
-
- assert(dst->size == last.size);
- if (dst->size != last.size)
- last.status |= KIT_ERROR_OUT_OF_MEMORY;
- else if (last.size > 0)
- memcpy(dst->values, ib_str(last).values, last.size);
-
- for (;;) {
- ib_token_t comment_open = ib_exact(last, SZ("<!--"));
-
- if (comment_open.status != KIT_OK)
- break;
-
- ib_token_t comment_text = ib_until(comment_open, SZ("-->"));
- ib_token_t comment_close = ib_exact(comment_text, SZ("-->"));
- ib_token_t next_text = ib_until(comment_close, SZ("<"));
-
- if (next_text.status == KIT_OK && next_text.size > 0) {
- i64 n = dst->size;
- DA_RESIZE(*dst, n + next_text.size);
-
- assert(dst->size == n + next_text.size);
- if (dst->size != n + next_text.size)
- next_text.status |= KIT_ERROR_OUT_OF_MEMORY;
- else
- memcpy(dst->values + n, ib_str(next_text).values,
- ib_str(next_text).size);
- }
-
- last = next_text;
- }
-
- return last;
-}
-
-static ib_token_t kit_xml_parse_string_(ib_token_t begin,
- ib_token_t *value) {
- assert(value != NULL);
- if (value == NULL) {
- begin.status |= KIT_ERROR_INTERNAL;
- return begin;
- }
-
- ib_token_t quotes_open = ib_exact(begin, SZ("\""));
- ib_token_t apostr_open = ib_exact(begin, SZ("'"));
-
- ib_token_t open = quotes_open.status == KIT_OK ? quotes_open
- : apostr_open;
-
- *value = ib_until(open, ib_str(open));
- ib_token_t close = ib_exact(*value, ib_str(open));
-
- return close;
-}
-
-static kit_xml_intermediate_t kit_xml_parse_buf_(
- ib_token_t begin, kit_allocator_t *alloc) {
- kit_xml_intermediate_t res;
- memset(&res, 0, sizeof res);
-
- ib_token_t last, spaces;
- memset(&last, 0, sizeof last);
- memset(&spaces, 0, sizeof spaces);
-
- str_builder_t tag_text_string;
- str_builder_t tag_tail_string;
- DA_INIT(tag_text_string, 0, alloc);
- DA_INIT(tag_tail_string, 0, alloc);
-
- ib_token_t tag_text = kit_xml_parse_text_(begin, &tag_text_string);
- last = tag_text;
-
- DA_INIT(res.tags, 0, alloc);
-
- for (;;) {
- ib_token_t tagend_open = ib_exact(last, SZ("</"));
- if (tagend_open.status == KIT_OK)
- break;
-
- ib_token_t tag_open = ib_exact(last, SZ("<"));
-
- if (tag_open.status != KIT_OK)
- break;
-
- xml_t tag;
- memset(&tag, 0, sizeof tag);
-
- ib_token_t decl_open = ib_exact(tag_open, SZ("?"));
-
- if (decl_open.status == KIT_OK) {
- tag.is_declaration = 1;
- last = decl_open;
- } else
- last = tag_open;
-
- spaces = ib_any(last, SZ(" \t\r\n"));
- ib_token_t tag_name = ib_none(spaces, SZ(" \t\r\n/>"));
-
- DA_INIT(tag.properties, 0, alloc);
-
- last = tag_name;
-
- for (;;) {
- spaces = ib_any(last, SZ(" \t\r\n"));
- ib_token_t property = ib_none(spaces, SZ(" \t\r\n=?/>"));
-
- if (property.status != KIT_OK || property.size == 0)
- break;
-
- spaces = ib_any(property, SZ(" \t\r\n"));
- ib_token_t equals = ib_exact(spaces, SZ("="));
- spaces = ib_any(equals, SZ(" \t\r\n"));
-
- ib_token_t value;
- last = kit_xml_parse_string_(spaces, &value);
-
- if (last.status == KIT_OK) {
- i64 n = tag.properties.size;
- DA_RESIZE(tag.properties, n + 1);
-
- assert(tag.properties.size == n + 1);
- if (tag.properties.size != n + 1) {
- last.status |= KIT_ERROR_OUT_OF_MEMORY;
- DA_DESTROY(tag.properties);
- } else {
- last.status |= kit_xml_alloc_and_unescape_(
- &tag.properties.values[n].name, ib_str(property),
- alloc);
- last.status |= kit_xml_alloc_and_unescape_(
- &tag.properties.values[n].value, ib_str(value), alloc);
- }
- }
- }
-
- if (tag.is_declaration) {
- ib_token_t tag_decl_close = ib_exact(spaces, SZ("?>"));
-
- last = tag_decl_close;
-
- DA_INIT(tag.text, 0, alloc);
- DA_INIT(tag.children, 0, alloc);
- } else {
- ib_token_t tag_close = ib_exact(spaces, SZ(">"));
- ib_token_t tag_close_empty = ib_exact(spaces, SZ("/>"));
-
- if (tag_close.status == KIT_OK) {
- kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close,
- alloc);
-
- tag.text = im.text;
- tag.children = im.tags;
-
- tagend_open = ib_exact(im.last, SZ("</"));
- spaces = ib_any(tagend_open, SZ(" \t\r\n"));
- ib_token_t tagend_name = ib_exact(spaces, ib_str(tag_name));
- spaces = ib_any(tagend_name, SZ(" \t\r\n"));
- ib_token_t tagend_close = ib_exact(spaces, SZ(">"));
-
- last = tagend_close;
-
- } else if (tag_close_empty.status == KIT_OK) {
- last = tag_close_empty;
-
- DA_INIT(tag.text, 0, alloc);
- DA_INIT(tag.children, 0, alloc);
- } else
- last.status |= KIT_PARSING_FAILED;
- }
-
- ib_token_t tag_tail = kit_xml_parse_text_(last, &tag_tail_string);
-
- last = tag_tail;
-
- if (last.status == KIT_OK) {
- i64 n = res.tags.size;
- DA_RESIZE(res.tags, n + 1);
-
- assert(res.tags.size == n + 1);
- if (res.tags.size != n + 1) {
- last.status |= KIT_ERROR_OUT_OF_MEMORY;
- xml_destroy(&tag);
- } else {
- last.status |= kit_xml_alloc_and_unescape_(
- &tag.tag, ib_str(tag_name), alloc);
- last.status |= kit_xml_alloc_and_unescape_(
- &tag.tail, WRAP_STR(tag_tail_string), alloc);
-
- res.tags.values[n] = tag;
- }
- } else
- xml_destroy(&tag);
- }
-
- if (last.status != KIT_OK) {
- for (i64 i = 0; i < res.tags.size; i++)
- xml_destroy(res.tags.values + i);
- DA_DESTROY(res.text);
- DA_DESTROY(res.tags);
- } else
- last.status |= kit_xml_alloc_and_unescape_(
- &res.text, WRAP_STR(tag_text_string), alloc);
-
- DA_DESTROY(tag_text_string);
- DA_DESTROY(tag_tail_string);
-
- res.last = last;
- return res;
-}
-
-kit_xml_parse_result_t kit_xml_parse(is_handle_t is,
- kit_allocator_t *alloc) {
- input_buffer_t ib = ib_init(is, alloc);
- kit_xml_intermediate_t im = kit_xml_parse_buf_(ib_token(&ib),
- alloc);
-
- kit_xml_parse_result_t res;
- memset(&res, 0, sizeof res);
-
- res.status = im.last.status;
-
- if (res.status != KIT_OK) {
- ib_destroy(&ib);
- return res;
- }
-
- if (im.text.size == 0 && im.tags.size == 1) {
- res.xml = im.tags.values[0];
- DA_DESTROY(im.text);
- DA_DESTROY(im.tags);
- ib_destroy(&ib);
- return res;
- }
-
- DA_INIT(res.xml.tag, 0, alloc);
- DA_INIT(res.xml.tail, 0, alloc);
- DA_INIT(res.xml.properties, 0, alloc);
-
- res.xml.text = im.text;
- res.xml.children = im.tags;
-
- ib_destroy(&ib);
- return res;
-}
-
-kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) {
- // TODO
- //
-
- assert(xml != NULL);
-
- xml_text_t result;
- memset(&result, 0, sizeof result);
-
- (void) alloc;
-
- result.status = KIT_ERROR_NOT_IMPLEMENTED;
- return result;
-}
-
-static s32 kit_xml_append_text_(str_builder_t *buf, xml_t *xml) {
- assert(buf != NULL);
- assert(xml != NULL);
-
- i64 n = buf->size;
- DA_RESIZE(*buf, n + xml->text.size);
-
- assert(buf->size == n + xml->text.size);
- if (buf->size != n + xml->text.size)
- return KIT_ERROR_OUT_OF_MEMORY;
-
- if (xml->text.size > 0)
- memcpy(buf->values + n, xml->text.values, xml->text.size);
-
- for (i64 i = 0; i < xml->children.size; i++) {
- s32 s = kit_xml_append_text_(buf, xml->children.values + i);
- if (s != KIT_OK)
- return s;
-
- str_t tail = WRAP_STR(xml->children.values[i].tail);
-
- if (tail.size <= 0)
- continue;
-
- n = buf->size;
- DA_RESIZE(*buf, n + tail.size);
-
- assert(buf->size == n + tail.size);
- if (buf->size != n + tail.size)
- return KIT_ERROR_OUT_OF_MEMORY;
-
- if (tail.size > 0)
- memcpy(buf->values + n, tail.values, tail.size);
- }
-
- return KIT_OK;
-}
-
-kit_xml_text_t kit_xml_full_text(kit_xml_t *xml,
- kit_allocator_t *alloc) {
- kit_xml_text_t res;
- res.status = KIT_OK;
- DA_INIT(res.text, 0, alloc);
-
- if (xml != NULL)
- res.status = kit_xml_append_text_(&res.text, xml);
- else
- res.status = KIT_ERROR_INVALID_ARGUMENT;
-
- return res;
-}
-
-b8 kit_xml_has_property(kit_xml_t *xml, kit_str_t name) {
- assert(xml != NULL);
- if (xml == NULL)
- return 0;
-
- for (i64 i = 0; i < xml->properties.size; i++)
- if (AR_EQUAL(xml->properties.values[i].name, name))
- return 1;
-
- return 0;
-}
-
-str_t kit_xml_property(kit_xml_t *xml, str_t name) {
- assert(xml != NULL);
- if (xml == NULL)
- return str(0, NULL);
-
- for (i64 i = 0; i < xml->properties.size; i++)
- if (AR_EQUAL(xml->properties.values[i].name, name))
- return WRAP_STR(xml->properties.values[i].value);
-
- assert(0);
- return str(0, NULL);
-}
-
-void kit_xml_destroy(kit_xml_t *xml) {
- assert(xml != NULL);
- if (xml == NULL)
- return;
-
- for (i64 i = 0; i < xml->properties.size; i++) {
- DA_DESTROY(xml->properties.values[i].name);
- DA_DESTROY(xml->properties.values[i].value);
- }
-
- for (i64 i = 0; i < xml->children.size; i++)
- kit_xml_destroy(xml->children.values + i);
-
- DA_DESTROY(xml->tag);
- DA_DESTROY(xml->text);
- DA_DESTROY(xml->tail);
-
- DA_DESTROY(xml->properties);
- DA_DESTROY(xml->children);
-}