diff options
Diffstat (limited to 'kit/xml.c')
-rw-r--r-- | kit/xml.c | 483 |
1 files changed, 0 insertions, 483 deletions
diff --git a/kit/xml.c b/kit/xml.c deleted file mode 100644 index f61bfc1..0000000 --- a/kit/xml.c +++ /dev/null @@ -1,483 +0,0 @@ -#include "xml.h" - -#include "input_buffer.h" -#include <assert.h> - -typedef struct { - ib_token_t last; - str_builder_t text; - kit_da_xml_t tags; -} kit_xml_intermediate_t; - -static s32 kit_xml_alloc_and_unescape_(str_builder_t *dst, str_t str, - kit_allocator_t *alloc) { - assert(dst != NULL); - assert(str.size == 0 || str.values != NULL); - - if (dst == NULL) - return KIT_ERROR_INTERNAL; - if (str.size != 0 && str.values == NULL) - return KIT_ERROR_INTERNAL; - - DA_INIT(*dst, str.size, alloc); - - if (dst->size != str.size) - return KIT_ERROR_OUT_OF_MEMORY; - - dst->size = 0; - - for (i64 i = 0; i < str.size; i++) - if (str.values[i] != '&') - dst->values[dst->size++] = str.values[i]; - else { - i64 n = 1; - while (i + n < str.size && str.values[i + n] != ';') - n++; - if (i + n >= str.size) { - DA_DESTROY(*dst); - return KIT_PARSING_FAILED; - } - if (n == 3 && memcmp(str.values + i, "<", 4) == 0) - dst->values[dst->size++] = '<'; - else if (n == 3 && memcmp(str.values + i, ">", 4) == 0) - dst->values[dst->size++] = '>'; - else if (n == 4 && memcmp(str.values + i, "&", 5) == 0) - dst->values[dst->size++] = '&'; - else if (n == 5 && memcmp(str.values + i, """, 6) == 0) - dst->values[dst->size++] = '"'; - else if (n == 5 && memcmp(str.values + i, "'", 6) == 0) - dst->values[dst->size++] = '\''; - else if (n - 2 <= 8 && str.values[i + 1] == '#' && - str.values[i + 2] == 'x') { - // hex encoding - // - - c8 buf[8]; - u64 x = 0; - - memcpy(buf, str.values + (i + 3), n - 2); - - for (i64 k = 0; k < n - 2; k++) { - c8 c = str.values[i + 3 + k]; - x <<= 8; - if (c >= '0' && c <= '9') - x |= (c - '0'); - else if (c >= 'a' && c <= 'f') - x |= 10 + (c - 'a'); - else if (c >= 'A' && c <= 'F') - x |= 10 + (c - 'A'); - else { - x = 0; - break; - } - } - - if (x == 0 || x > 255u) { - // TODO - // UTF-8 encoding - - DA_DESTROY(*dst); - return KIT_PARSING_FAILED; - } - - dst->values[dst->size++] = (c8) x; - } else if (n - 1 <= 20 && str.values[i + 1] == '#') { - // dec encoding - // - - c8 buf[20]; - u64 x = 0; - - memcpy(buf, str.values + (i + 2), n - 2); - - for (i64 k = 0; k < n - 1; k++) { - c8 c = str.values[i + 2 + k]; - x *= 10; - if (c >= '0' && c <= '9') - x += (c - '0'); - else { - x = 0; - break; - } - } - - if (x == 0 || x > 255u) { - // TODO - // UTF-8 encoding - - DA_DESTROY(*dst); - return KIT_PARSING_FAILED; - } - - dst->values[dst->size++] = (c8) x; - } else { - DA_DESTROY(*dst); - return KIT_PARSING_FAILED; - } - i += n; - } - - return KIT_OK; -} - -static ib_token_t kit_xml_parse_text_(ib_token_t begin, - str_builder_t *dst) { - ib_token_t last = ib_until(begin, SZ("<")); - - DA_RESIZE(*dst, last.size); - - assert(dst->size == last.size); - if (dst->size != last.size) - last.status |= KIT_ERROR_OUT_OF_MEMORY; - else if (last.size > 0) - memcpy(dst->values, ib_str(last).values, last.size); - - for (;;) { - ib_token_t comment_open = ib_exact(last, SZ("<!--")); - - if (comment_open.status != KIT_OK) - break; - - ib_token_t comment_text = ib_until(comment_open, SZ("-->")); - ib_token_t comment_close = ib_exact(comment_text, SZ("-->")); - ib_token_t next_text = ib_until(comment_close, SZ("<")); - - if (next_text.status == KIT_OK && next_text.size > 0) { - i64 n = dst->size; - DA_RESIZE(*dst, n + next_text.size); - - assert(dst->size == n + next_text.size); - if (dst->size != n + next_text.size) - next_text.status |= KIT_ERROR_OUT_OF_MEMORY; - else - memcpy(dst->values + n, ib_str(next_text).values, - ib_str(next_text).size); - } - - last = next_text; - } - - return last; -} - -static ib_token_t kit_xml_parse_string_(ib_token_t begin, - ib_token_t *value) { - assert(value != NULL); - if (value == NULL) { - begin.status |= KIT_ERROR_INTERNAL; - return begin; - } - - ib_token_t quotes_open = ib_exact(begin, SZ("\"")); - ib_token_t apostr_open = ib_exact(begin, SZ("'")); - - ib_token_t open = quotes_open.status == KIT_OK ? quotes_open - : apostr_open; - - *value = ib_until(open, ib_str(open)); - ib_token_t close = ib_exact(*value, ib_str(open)); - - return close; -} - -static kit_xml_intermediate_t kit_xml_parse_buf_( - ib_token_t begin, kit_allocator_t *alloc) { - kit_xml_intermediate_t res; - memset(&res, 0, sizeof res); - - ib_token_t last, spaces; - memset(&last, 0, sizeof last); - memset(&spaces, 0, sizeof spaces); - - str_builder_t tag_text_string; - str_builder_t tag_tail_string; - DA_INIT(tag_text_string, 0, alloc); - DA_INIT(tag_tail_string, 0, alloc); - - ib_token_t tag_text = kit_xml_parse_text_(begin, &tag_text_string); - last = tag_text; - - DA_INIT(res.tags, 0, alloc); - - for (;;) { - ib_token_t tagend_open = ib_exact(last, SZ("</")); - if (tagend_open.status == KIT_OK) - break; - - ib_token_t tag_open = ib_exact(last, SZ("<")); - - if (tag_open.status != KIT_OK) - break; - - xml_t tag; - memset(&tag, 0, sizeof tag); - - ib_token_t decl_open = ib_exact(tag_open, SZ("?")); - - if (decl_open.status == KIT_OK) { - tag.is_declaration = 1; - last = decl_open; - } else - last = tag_open; - - spaces = ib_any(last, SZ(" \t\r\n")); - ib_token_t tag_name = ib_none(spaces, SZ(" \t\r\n/>")); - - DA_INIT(tag.properties, 0, alloc); - - last = tag_name; - - for (;;) { - spaces = ib_any(last, SZ(" \t\r\n")); - ib_token_t property = ib_none(spaces, SZ(" \t\r\n=?/>")); - - if (property.status != KIT_OK || property.size == 0) - break; - - spaces = ib_any(property, SZ(" \t\r\n")); - ib_token_t equals = ib_exact(spaces, SZ("=")); - spaces = ib_any(equals, SZ(" \t\r\n")); - - ib_token_t value; - last = kit_xml_parse_string_(spaces, &value); - - if (last.status == KIT_OK) { - i64 n = tag.properties.size; - DA_RESIZE(tag.properties, n + 1); - - assert(tag.properties.size == n + 1); - if (tag.properties.size != n + 1) { - last.status |= KIT_ERROR_OUT_OF_MEMORY; - DA_DESTROY(tag.properties); - } else { - last.status |= kit_xml_alloc_and_unescape_( - &tag.properties.values[n].name, ib_str(property), - alloc); - last.status |= kit_xml_alloc_and_unescape_( - &tag.properties.values[n].value, ib_str(value), alloc); - } - } - } - - if (tag.is_declaration) { - ib_token_t tag_decl_close = ib_exact(spaces, SZ("?>")); - - last = tag_decl_close; - - DA_INIT(tag.text, 0, alloc); - DA_INIT(tag.children, 0, alloc); - } else { - ib_token_t tag_close = ib_exact(spaces, SZ(">")); - ib_token_t tag_close_empty = ib_exact(spaces, SZ("/>")); - - if (tag_close.status == KIT_OK) { - kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close, - alloc); - - tag.text = im.text; - tag.children = im.tags; - - tagend_open = ib_exact(im.last, SZ("</")); - spaces = ib_any(tagend_open, SZ(" \t\r\n")); - ib_token_t tagend_name = ib_exact(spaces, ib_str(tag_name)); - spaces = ib_any(tagend_name, SZ(" \t\r\n")); - ib_token_t tagend_close = ib_exact(spaces, SZ(">")); - - last = tagend_close; - - } else if (tag_close_empty.status == KIT_OK) { - last = tag_close_empty; - - DA_INIT(tag.text, 0, alloc); - DA_INIT(tag.children, 0, alloc); - } else - last.status |= KIT_PARSING_FAILED; - } - - ib_token_t tag_tail = kit_xml_parse_text_(last, &tag_tail_string); - - last = tag_tail; - - if (last.status == KIT_OK) { - i64 n = res.tags.size; - DA_RESIZE(res.tags, n + 1); - - assert(res.tags.size == n + 1); - if (res.tags.size != n + 1) { - last.status |= KIT_ERROR_OUT_OF_MEMORY; - xml_destroy(&tag); - } else { - last.status |= kit_xml_alloc_and_unescape_( - &tag.tag, ib_str(tag_name), alloc); - last.status |= kit_xml_alloc_and_unescape_( - &tag.tail, WRAP_STR(tag_tail_string), alloc); - - res.tags.values[n] = tag; - } - } else - xml_destroy(&tag); - } - - if (last.status != KIT_OK) { - for (i64 i = 0; i < res.tags.size; i++) - xml_destroy(res.tags.values + i); - DA_DESTROY(res.text); - DA_DESTROY(res.tags); - } else - last.status |= kit_xml_alloc_and_unescape_( - &res.text, WRAP_STR(tag_text_string), alloc); - - DA_DESTROY(tag_text_string); - DA_DESTROY(tag_tail_string); - - res.last = last; - return res; -} - -kit_xml_parse_result_t kit_xml_parse(is_handle_t is, - kit_allocator_t *alloc) { - input_buffer_t ib = ib_init(is, alloc); - kit_xml_intermediate_t im = kit_xml_parse_buf_(ib_token(&ib), - alloc); - - kit_xml_parse_result_t res; - memset(&res, 0, sizeof res); - - res.status = im.last.status; - - if (res.status != KIT_OK) { - ib_destroy(&ib); - return res; - } - - if (im.text.size == 0 && im.tags.size == 1) { - res.xml = im.tags.values[0]; - DA_DESTROY(im.text); - DA_DESTROY(im.tags); - ib_destroy(&ib); - return res; - } - - DA_INIT(res.xml.tag, 0, alloc); - DA_INIT(res.xml.tail, 0, alloc); - DA_INIT(res.xml.properties, 0, alloc); - - res.xml.text = im.text; - res.xml.children = im.tags; - - ib_destroy(&ib); - return res; -} - -kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) { - // TODO - // - - assert(xml != NULL); - - xml_text_t result; - memset(&result, 0, sizeof result); - - (void) alloc; - - result.status = KIT_ERROR_NOT_IMPLEMENTED; - return result; -} - -static s32 kit_xml_append_text_(str_builder_t *buf, xml_t *xml) { - assert(buf != NULL); - assert(xml != NULL); - - i64 n = buf->size; - DA_RESIZE(*buf, n + xml->text.size); - - assert(buf->size == n + xml->text.size); - if (buf->size != n + xml->text.size) - return KIT_ERROR_OUT_OF_MEMORY; - - if (xml->text.size > 0) - memcpy(buf->values + n, xml->text.values, xml->text.size); - - for (i64 i = 0; i < xml->children.size; i++) { - s32 s = kit_xml_append_text_(buf, xml->children.values + i); - if (s != KIT_OK) - return s; - - str_t tail = WRAP_STR(xml->children.values[i].tail); - - if (tail.size <= 0) - continue; - - n = buf->size; - DA_RESIZE(*buf, n + tail.size); - - assert(buf->size == n + tail.size); - if (buf->size != n + tail.size) - return KIT_ERROR_OUT_OF_MEMORY; - - if (tail.size > 0) - memcpy(buf->values + n, tail.values, tail.size); - } - - return KIT_OK; -} - -kit_xml_text_t kit_xml_full_text(kit_xml_t *xml, - kit_allocator_t *alloc) { - kit_xml_text_t res; - res.status = KIT_OK; - DA_INIT(res.text, 0, alloc); - - if (xml != NULL) - res.status = kit_xml_append_text_(&res.text, xml); - else - res.status = KIT_ERROR_INVALID_ARGUMENT; - - return res; -} - -b8 kit_xml_has_property(kit_xml_t *xml, kit_str_t name) { - assert(xml != NULL); - if (xml == NULL) - return 0; - - for (i64 i = 0; i < xml->properties.size; i++) - if (AR_EQUAL(xml->properties.values[i].name, name)) - return 1; - - return 0; -} - -str_t kit_xml_property(kit_xml_t *xml, str_t name) { - assert(xml != NULL); - if (xml == NULL) - return str(0, NULL); - - for (i64 i = 0; i < xml->properties.size; i++) - if (AR_EQUAL(xml->properties.values[i].name, name)) - return WRAP_STR(xml->properties.values[i].value); - - assert(0); - return str(0, NULL); -} - -void kit_xml_destroy(kit_xml_t *xml) { - assert(xml != NULL); - if (xml == NULL) - return; - - for (i64 i = 0; i < xml->properties.size; i++) { - DA_DESTROY(xml->properties.values[i].name); - DA_DESTROY(xml->properties.values[i].value); - } - - for (i64 i = 0; i < xml->children.size; i++) - kit_xml_destroy(xml->children.values + i); - - DA_DESTROY(xml->tag); - DA_DESTROY(xml->text); - DA_DESTROY(xml->tail); - - DA_DESTROY(xml->properties); - DA_DESTROY(xml->children); -} |