From df00df5a7a5bcd9076d4423128ea014ab3535626 Mon Sep 17 00:00:00 2001 From: Mitya Selivanov Date: Sun, 11 Feb 2024 18:17:33 +0100 Subject: Update kit --- source/kit/xml.c | 431 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 237 insertions(+), 194 deletions(-) (limited to 'source/kit/xml.c') diff --git a/source/kit/xml.c b/source/kit/xml.c index c62ee82..8d22bf3 100644 --- a/source/kit/xml.c +++ b/source/kit/xml.c @@ -4,198 +4,241 @@ #include typedef struct { - ib_t last; - kit_str_builder_t text; - kit_da_xml_t tags; + ib_token_t last; + str_builder_t text; + kit_da_xml_t tags; } kit_xml_intermediate_t; -static kit_status_t kit_xml_unescape_(str_builder_t *str) { - assert(str != NULL); +static s32 kit_xml_alloc_and_unescape_(str_builder_t *dst, str_t str, + kit_allocator_t *alloc) { + assert(dst != NULL); + assert(str.size == 0 || str.values != NULL); - str_builder_t buf; - DA_INIT(buf, str->size, str->alloc); - buf.size = 0; + if (dst == NULL) + return KIT_ERROR_INTERNAL; + if (str.size != 0 && str.values == NULL) + return KIT_ERROR_INTERNAL; - for (i64 i = 0; i < str->size; i++) - if (str->values[i] != '&') - buf.values[buf.size++] = str->values[i]; + DA_INIT(*dst, str.size, alloc); + + if (dst->size != str.size) + return KIT_ERROR_BAD_ALLOC; + + dst->size = 0; + + for (i64 i = 0; i < str.size; i++) + if (str.values[i] != '&') + dst->values[dst->size++] = str.values[i]; else { i64 n = 1; - while (i + n < str->size && str->values[i + n] != ';') n++; - if (i + n >= str->size) { - DA_DESTROY(buf); - return KIT_ERROR_INTERNAL; + while (i + n < str.size && str.values[i + n] != ';') n++; + if (i + n >= str.size) { + DA_DESTROY(*dst); + return KIT_PARSING_FAILED; } - if (n == 3 && memcmp(str->values + i, "<", 4) == 0) - buf.values[buf.size++] = '<'; - else if (n == 3 && memcmp(str->values + i, ">", 4) == 0) - buf.values[buf.size++] = '>'; - else if (n == 4 && memcmp(str->values + i, "&", 5) == 0) - buf.values[buf.size++] = '&'; - else if (n == 5 && memcmp(str->values + i, """, 6) == 0) - buf.values[buf.size++] = '"'; - else if (n == 5 && memcmp(str->values + i, "'", 6) == 0) - buf.values[buf.size++] = '\''; - else { - DA_DESTROY(buf); - return KIT_ERROR_INTERNAL; + if (n == 3 && memcmp(str.values + i, "<", 4) == 0) + dst->values[dst->size++] = '<'; + else if (n == 3 && memcmp(str.values + i, ">", 4) == 0) + dst->values[dst->size++] = '>'; + else if (n == 4 && memcmp(str.values + i, "&", 5) == 0) + dst->values[dst->size++] = '&'; + else if (n == 5 && memcmp(str.values + i, """, 6) == 0) + dst->values[dst->size++] = '"'; + else if (n == 5 && memcmp(str.values + i, "'", 6) == 0) + dst->values[dst->size++] = '\''; + else if (n - 2 <= 8 && str.values[i + 1] == '#' && + str.values[i + 2] == 'x') { + // hex encoding + // + + c8 buf[8]; + u64 x = 0; + + memcpy(buf, str.values + (i + 3), n - 2); + + for (i64 k = 0; k < n - 2; k++) { + c8 c = str.values[i + 3 + k]; + x <<= 8; + if (c >= '0' && c <= '9') + x |= (c - '0'); + else if (c >= 'a' && c <= 'f') + x |= 10 + (c - 'a'); + else if (c >= 'A' && c <= 'F') + x |= 10 + (c - 'A'); + else { + x = 0; + break; + } + } + + if (x == 0 || x > 255u) { + // TODO + // UTF-8 encoding + + DA_DESTROY(*dst); + return KIT_PARSING_FAILED; + } + + dst->values[dst->size++] = (c8) x; + } else if (n - 1 <= 20 && str.values[i + 1] == '#') { + // dec encoding + // + + c8 buf[20]; + u64 x = 0; + + memcpy(buf, str.values + (i + 2), n - 2); + + for (i64 k = 0; k < n - 1; k++) { + c8 c = str.values[i + 2 + k]; + x *= 10; + if (c >= '0' && c <= '9') + x += (c - '0'); + else { + x = 0; + break; + } + } + + if (x == 0 || x > 255u) { + // TODO + // UTF-8 encoding + + DA_DESTROY(*dst); + return KIT_PARSING_FAILED; + } + + dst->values[dst->size++] = (c8) x; + } else { + DA_DESTROY(*dst); + return KIT_PARSING_FAILED; } i += n; } - DA_DESTROY(*str); - *str = buf; - return KIT_OK; } -static ib_t kit_xml_parse_text_(ib_t begin) { - ib_t text = ib_until(begin, SZ("<")); - ib_t last = ib_copy(text); +static ib_token_t kit_xml_parse_text_(ib_token_t begin, + str_builder_t *dst) { + ib_token_t last = ib_until(begin, SZ("<")); + + DA_RESIZE(*dst, last.size); + + assert(dst->size == last.size); + if (dst->size != last.size) + last.status |= KIT_ERROR_BAD_ALLOC; + else if (last.size > 0) + memcpy(dst->values, ib_str(last).values, last.size); for (;;) { - ib_t comment_open = ib_exact(last, SZ("")); - ib_t comment_close = ib_exact(comment_text, SZ("-->")); - ib_t next_text = ib_until(comment_close, SZ("<")); + ib_token_t comment_text = ib_until(comment_open, SZ("-->")); + ib_token_t comment_close = ib_exact(comment_text, SZ("-->")); + ib_token_t next_text = ib_until(comment_close, SZ("<")); - if (next_text.status == KIT_OK && next_text.data.size > 0) { - i64 n = text.data.size; - DA_RESIZE(text.data, n + next_text.data.size); + if (next_text.status == KIT_OK && next_text.size > 0) { + i64 n = dst->size; + DA_RESIZE(*dst, n + next_text.size); - assert(text.data.size == n + next_text.data.size); - if (text.data.size != n + next_text.data.size) - next_text.status = KIT_ERROR_BAD_ALLOC; + assert(dst->size == n + next_text.size); + if (dst->size != n + next_text.size) + next_text.status |= KIT_ERROR_BAD_ALLOC; else - memcpy(text.data.values + n, next_text.data.values, - next_text.data.size); + memcpy(dst->values + n, ib_str(next_text).values, + ib_str(next_text).size); } - ib_destroy(last); - last = ib_copy(next_text); - - ib_destroy(comment_open); - ib_destroy(comment_text); - ib_destroy(comment_close); - ib_destroy(next_text); + last = next_text; } - // move - DA_DESTROY(last.data); - last.data = text.data; - memset(&text.data, 0, sizeof text.data); - - kit_status_t s = kit_xml_unescape_(&last.data); - if (s != KIT_OK) - last.status = s; - - ib_destroy(text); - return last; } -static ib_t kit_xml_parse_string_(ib_t begin) { - ib_t quotes_open = ib_exact(begin, SZ("\"")); - ib_t apostr_open = ib_exact(begin, SZ("'")); - - ib_t open = quotes_open.status == KIT_OK ? quotes_open - : apostr_open; - - ib_t text = ib_until(open, WRAP_STR(open.data)); - ib_t close = ib_exact(text, WRAP_STR(open.data)); +static ib_token_t kit_xml_parse_string_(ib_token_t begin, + ib_token_t *value) { + assert(value != NULL); + if (value == NULL) { + begin.status |= KIT_ERROR_INTERNAL; + return begin; + } - // move - DA_DESTROY(close.data); - close.data = text.data; - memset(&text.data, 0, sizeof text.data); + ib_token_t quotes_open = ib_exact(begin, SZ("\"")); + ib_token_t apostr_open = ib_exact(begin, SZ("'")); - kit_status_t s = kit_xml_unescape_(&close.data); - if (s == KIT_OK) - close.status = s; + ib_token_t open = quotes_open.status == KIT_OK ? quotes_open + : apostr_open; - ib_destroy(quotes_open); - ib_destroy(apostr_open); - ib_destroy(text); + *value = ib_until(open, ib_str(open)); + ib_token_t close = ib_exact(*value, ib_str(open)); return close; } static kit_xml_intermediate_t kit_xml_parse_buf_( - ib_t begin, kit_allocator_t *alloc) { + ib_token_t begin, kit_allocator_t *alloc) { kit_xml_intermediate_t res; memset(&res, 0, sizeof res); - ib_t last, spaces; + ib_token_t last, spaces; memset(&last, 0, sizeof last); memset(&spaces, 0, sizeof spaces); - ib_t tag_text = kit_xml_parse_text_(begin); - last = ib_copy(tag_text); + str_builder_t tag_text_string; + str_builder_t tag_tail_string; + DA_INIT(tag_text_string, 0, alloc); + DA_INIT(tag_tail_string, 0, alloc); + + ib_token_t tag_text = kit_xml_parse_text_(begin, &tag_text_string); + last = tag_text; DA_INIT(res.tags, 0, alloc); for (;;) { - ib_t tagend_open = ib_exact(last, SZ("")); - ib_destroy(spaces); + spaces = ib_any(last, SZ(" \t\r\n")); + ib_token_t tag_name = ib_none(spaces, SZ(" \t\r\n/>")); DA_INIT(tag.properties, 0, alloc); - ib_destroy(last); - last = ib_copy(tag_name); + last = tag_name; for (;;) { - spaces = ib_any(last, SZ(" \t\r\n")); - ib_t property = ib_none(spaces, SZ(" \t\r\n=?/>")); - ib_destroy(spaces); + spaces = ib_any(last, SZ(" \t\r\n")); + ib_token_t property = ib_none(spaces, SZ(" \t\r\n=?/>")); - if (property.status != KIT_OK || property.data.size == 0) { - ib_destroy(property); + if (property.status != KIT_OK || property.size == 0) break; - } - spaces = ib_any(property, SZ(" \t\r\n")); - ib_t equals = ib_exact(spaces, SZ("=")); - ib_destroy(spaces); - spaces = ib_any(equals, SZ(" \t\r\n")); - ib_t value = kit_xml_parse_string_(spaces); - ib_destroy(spaces); + spaces = ib_any(property, SZ(" \t\r\n")); + ib_token_t equals = ib_exact(spaces, SZ("=")); + spaces = ib_any(equals, SZ(" \t\r\n")); - ib_destroy(last); - last = ib_copy(value); + ib_token_t value; + last = kit_xml_parse_string_(spaces, &value); if (last.status == KIT_OK) { i64 n = tag.properties.size; @@ -203,77 +246,56 @@ static kit_xml_intermediate_t kit_xml_parse_buf_( assert(tag.properties.size == n + 1); if (tag.properties.size != n + 1) { - last.status = KIT_ERROR_BAD_ALLOC; + last.status |= KIT_ERROR_BAD_ALLOC; DA_DESTROY(tag.properties); } else { - // move - tag.properties.values[n].name = property.data; - memset(&property.data, 0, sizeof property.data); - - // move - tag.properties.values[n].value = value.data; - memset(&value.data, 0, sizeof value.data); + last.status |= kit_xml_alloc_and_unescape_( + &tag.properties.values[n].name, ib_str(property), + alloc); + last.status |= kit_xml_alloc_and_unescape_( + &tag.properties.values[n].value, ib_str(value), alloc); } } - - ib_destroy(property); - ib_destroy(equals); - ib_destroy(value); } - spaces = ib_any(last, SZ(" \t\r\n")); - if (tag.is_declaration) { - ib_t tag_decl_close = ib_exact(spaces, SZ("?>")); - ib_destroy(spaces); + ib_token_t tag_decl_close = ib_exact(spaces, SZ("?>")); - ib_destroy(last); last = tag_decl_close; DA_INIT(tag.text, 0, alloc); DA_INIT(tag.children, 0, alloc); } else { - ib_t tag_close = ib_exact(spaces, SZ(">")); - ib_t tag_close_empty = ib_exact(spaces, SZ("/>")); - ib_destroy(spaces); + ib_token_t tag_close = ib_exact(spaces, SZ(">")); + ib_token_t tag_close_empty = ib_exact(spaces, SZ("/>")); if (tag_close.status == KIT_OK) { kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close, alloc); - tag.text = im.text; - tag.children = im.tags; - - tagend_open = ib_exact(im.last, SZ("")); - ib_destroy(spaces); - ib_destroy(tagend_open); - ib_destroy(tagend_name); - - ib_destroy(last); + + tag.text = im.text; + tag.children = im.tags; + + tagend_open = ib_exact(im.last, SZ("")); + last = tagend_close; } else if (tag_close_empty.status == KIT_OK) { - ib_destroy(last); - last = ib_copy(tag_close_empty); + last = tag_close_empty; DA_INIT(tag.text, 0, alloc); DA_INIT(tag.children, 0, alloc); } else - last.status = KIT_ERROR_INTERNAL; - - ib_destroy(tag_close); - ib_destroy(tag_close_empty); + last.status |= KIT_PARSING_FAILED; } - ib_t tag_tail = kit_xml_parse_text_(last); + ib_token_t tag_tail = kit_xml_parse_text_(last, &tag_tail_string); - ib_destroy(last); - last = ib_copy(tag_tail); + last = tag_tail; if (last.status == KIT_OK) { i64 n = res.tags.size; @@ -281,25 +303,18 @@ static kit_xml_intermediate_t kit_xml_parse_buf_( assert(res.tags.size == n + 1); if (res.tags.size != n + 1) { - last.status = KIT_ERROR_BAD_ALLOC; + last.status |= KIT_ERROR_BAD_ALLOC; xml_destroy(&tag); } else { - // move - tag.tag = tag_name.data; - memset(&tag_name.data, 0, sizeof tag_name.data); - - // move - tag.tail = tag_tail.data; - memset(&tag_tail.data, 0, sizeof tag_tail.data); + last.status |= kit_xml_alloc_and_unescape_( + &tag.tag, ib_str(tag_name), alloc); + last.status |= kit_xml_alloc_and_unescape_( + &tag.tail, WRAP_STR(tag_tail_string), alloc); res.tags.values[n] = tag; } } else xml_destroy(&tag); - - ib_destroy(tag_open); - ib_destroy(tag_name); - ib_destroy(tag_tail); } if (last.status != KIT_OK) { @@ -307,37 +322,38 @@ static kit_xml_intermediate_t kit_xml_parse_buf_( xml_destroy(res.tags.values + i); DA_DESTROY(res.text); DA_DESTROY(res.tags); - } else { - // move - res.text = tag_text.data; - memset(&tag_text.data, 0, sizeof tag_text.data); - } + } else + last.status |= kit_xml_alloc_and_unescape_( + &res.text, WRAP_STR(tag_text_string), alloc); - ib_destroy(tag_text); + DA_DESTROY(tag_text_string); + DA_DESTROY(tag_tail_string); res.last = last; return res; } -kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is, +kit_xml_parse_result_t kit_xml_parse(is_handle_t is, kit_allocator_t *alloc) { - ib_t ib = ib_wrap(is, alloc); - kit_xml_intermediate_t im = kit_xml_parse_buf_(ib, alloc); - ib_destroy(ib); + input_buffer_t ib = ib_wrap(is, alloc); + kit_xml_intermediate_t im = kit_xml_parse_buf_(ib_token(&ib), + alloc); kit_xml_parse_result_t res; memset(&res, 0, sizeof res); res.status = im.last.status; - ib_destroy(im.last); - if (res.status != KIT_OK) + if (res.status != KIT_OK) { + ib_destroy(&ib); return res; + } if (im.text.size == 0 && im.tags.size == 1) { res.xml = im.tags.values[0]; DA_DESTROY(im.text); DA_DESTROY(im.tags); + ib_destroy(&ib); return res; } @@ -348,10 +364,14 @@ kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is, res.xml.text = im.text; res.xml.children = im.tags; + ib_destroy(&ib); return res; } kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) { + // TODO + // + assert(xml != NULL); xml_text_t result; @@ -361,8 +381,7 @@ kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) { return result; } -static kit_status_t kit_xml_append_text_(str_builder_t *buf, - xml_t *xml) { +static s32 kit_xml_append_text_(str_builder_t *buf, xml_t *xml) { assert(buf != NULL); assert(xml != NULL); @@ -377,8 +396,7 @@ static kit_status_t kit_xml_append_text_(str_builder_t *buf, memcpy(buf->values + n, xml->text.values, xml->text.size); for (i64 i = 0; i < xml->children.size; i++) { - kit_status_t s = kit_xml_append_text_(buf, - xml->children.values + i); + s32 s = kit_xml_append_text_(buf, xml->children.values + i); if (s != KIT_OK) return s; @@ -415,6 +433,31 @@ kit_xml_text_t kit_xml_full_text(kit_xml_t *xml, return res; } +b8 kit_xml_has_property(kit_xml_t *xml, kit_str_t name) { + assert(xml != NULL); + if (xml == NULL) + return 0; + + for (i64 i = 0; i < xml->properties.size; i++) + if (AR_EQUAL(xml->properties.values[i].name, name)) + return 1; + + return 0; +} + +str_t kit_xml_property(kit_xml_t *xml, str_t name) { + assert(xml != NULL); + if (xml == NULL) + return str(0, NULL); + + for (i64 i = 0; i < xml->properties.size; i++) + if (AR_EQUAL(xml->properties.values[i].name, name)) + return WRAP_STR(xml->properties.values[i].value); + + assert(0); + return str(0, NULL); +} + void kit_xml_destroy(kit_xml_t *xml) { assert(xml != NULL); if (xml == NULL) -- cgit v1.2.3