summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2023-09-22 16:50:23 +0200
committerMitya Selivanov <automainint@guattari.tech>2023-09-22 16:50:23 +0200
commitbbb397d327f84be61f90cb2744c3f29d395857fd (patch)
tree5fef4e7b6189152e361465ddbe55c04fc52217b6 /source
parent4dc8d307ab16abea09059490b00fadc6d3157985 (diff)
downloadkit-bbb397d327f84be61f90cb2744c3f29d395857fd.zip
xml: comments
Diffstat (limited to 'source')
-rw-r--r--source/kit/xml.c149
-rw-r--r--source/kit/xml.h1
-rw-r--r--source/tests/xml.test.c157
3 files changed, 277 insertions, 30 deletions
diff --git a/source/kit/xml.c b/source/kit/xml.c
index ed11b18..d815e9b 100644
--- a/source/kit/xml.c
+++ b/source/kit/xml.c
@@ -21,6 +21,38 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
ib_t tag_text = ib_until(begin, SZ("<"));
last = ib_copy(tag_text);
+ for (;;) {
+ ib_t comment_open = ib_exact(last, SZ("<!--"));
+
+ if (comment_open.status != KIT_OK) {
+ ib_destroy(comment_open);
+ break;
+ }
+
+ ib_t comment_text = ib_until(comment_open, SZ("-->"));
+ ib_t comment_close = ib_exact(comment_text, SZ("-->"));
+ ib_t next_text = ib_until(comment_close, SZ("<"));
+
+ if (next_text.status == KIT_OK && next_text.data.size > 0) {
+ i64 n = tag_text.data.size;
+ DA_RESIZE(tag_text.data, n + next_text.data.size);
+
+ if (tag_text.data.size != n + next_text.data.size)
+ next_text.status = KIT_ERROR_BAD_ALLOC;
+ else
+ memcpy(tag_text.data.values + n, next_text.data.values,
+ next_text.data.size);
+ }
+
+ ib_destroy(last);
+ last = ib_copy(next_text);
+
+ ib_destroy(comment_open);
+ ib_destroy(comment_text);
+ ib_destroy(comment_close);
+ ib_destroy(next_text);
+ }
+
DA_INIT(res.tags, 0, alloc);
for (;;) {
@@ -39,7 +71,19 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
xml_t tag;
memset(&tag, 0, sizeof tag);
- spaces = ib_any(tag_open, SZ(" \t\r\n"));
+ ib_t decl_open = ib_exact(tag_open, SZ("?"));
+
+ ib_destroy(last);
+
+ if (decl_open.status == KIT_OK) {
+ tag.is_declaration = 1;
+ last = ib_copy(decl_open);
+ } else
+ last = ib_copy(tag_open);
+
+ ib_destroy(decl_open);
+
+ spaces = ib_any(last, SZ(" \t\r\n"));
ib_t tag_name = ib_none(spaces, SZ(" \t\r\n/>"));
ib_destroy(spaces);
@@ -50,7 +94,7 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
for (;;) {
spaces = ib_any(last, SZ(" \t\r\n"));
- ib_t property = ib_none(spaces, SZ(" \t\r\n=/>"));
+ ib_t property = ib_none(spaces, SZ(" \t\r\n=?/>"));
ib_destroy(spaces);
if (property.status != KIT_OK || property.data.size == 0) {
@@ -94,45 +138,92 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
ib_destroy(value_text);
}
- spaces = ib_any(last, SZ(" \t\r\n"));
- ib_t tag_close = ib_exact(spaces, SZ(">"));
- ib_t tag_close_empty = ib_exact(spaces, SZ("/>"));
- ib_destroy(spaces);
-
- if (tag_close.status == KIT_OK) {
- kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close,
- alloc);
- tag.text = im.text;
- tag.children = im.tags;
+ spaces = ib_any(last, SZ(" \t\r\n"));
- tagend_open = ib_exact(im.last, SZ("</"));
- ib_destroy(im.last);
- spaces = ib_any(tagend_open, SZ(" \t\r\n"));
- ib_t tagend_name = ib_exact(spaces, WRAP_STR(tag_name.data));
+ if (tag.is_declaration) {
+ ib_t tag_decl_close = ib_exact(spaces, SZ("?>"));
ib_destroy(spaces);
- spaces = ib_any(tagend_name, SZ(" \t\r\n"));
- ib_t tagend_close = ib_exact(spaces, SZ(">"));
- ib_destroy(spaces);
- ib_destroy(tagend_open);
- ib_destroy(tagend_name);
-
- ib_destroy(last);
- last = tagend_close;
- } else if (tag_close_empty.status == KIT_OK) {
ib_destroy(last);
- last = ib_copy(tag_close_empty);
+ last = tag_decl_close;
DA_INIT(tag.text, 0, alloc);
DA_INIT(tag.children, 0, alloc);
- } else
- last.status = KIT_ERROR_INTERNAL;
+ } else {
+ ib_t tag_close = ib_exact(spaces, SZ(">"));
+ ib_t tag_close_empty = ib_exact(spaces, SZ("/>"));
+ ib_destroy(spaces);
+
+ if (tag_close.status == KIT_OK) {
+ kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close,
+ alloc);
+ tag.text = im.text;
+ tag.children = im.tags;
+
+ tagend_open = ib_exact(im.last, SZ("</"));
+ ib_destroy(im.last);
+ spaces = ib_any(tagend_open, SZ(" \t\r\n"));
+ ib_t tagend_name = ib_exact(spaces, WRAP_STR(tag_name.data));
+ ib_destroy(spaces);
+ spaces = ib_any(tagend_name, SZ(" \t\r\n"));
+ ib_t tagend_close = ib_exact(spaces, SZ(">"));
+ ib_destroy(spaces);
+ ib_destroy(tagend_open);
+ ib_destroy(tagend_name);
+
+ ib_destroy(last);
+ last = tagend_close;
+
+ } else if (tag_close_empty.status == KIT_OK) {
+ ib_destroy(last);
+ last = ib_copy(tag_close_empty);
+
+ DA_INIT(tag.text, 0, alloc);
+ DA_INIT(tag.children, 0, alloc);
+ } else
+ last.status = KIT_ERROR_INTERNAL;
+
+ ib_destroy(tag_close);
+ ib_destroy(tag_close_empty);
+ }
ib_t tag_tail = ib_until(last, SZ("<"));
ib_destroy(last);
last = ib_copy(tag_tail);
+ for (;;) {
+ ib_t comment_open = ib_exact(last, SZ("<!--"));
+
+ if (comment_open.status != KIT_OK) {
+ ib_destroy(comment_open);
+ break;
+ }
+
+ ib_t comment_text = ib_until(comment_open, SZ("-->"));
+ ib_t comment_close = ib_exact(comment_text, SZ("-->"));
+ ib_t next_text = ib_until(comment_close, SZ("<"));
+
+ if (next_text.status == KIT_OK && next_text.data.size > 0) {
+ i64 n = tag_tail.data.size;
+ DA_RESIZE(tag_tail.data, n + next_text.data.size);
+
+ if (tag_tail.data.size != n + next_text.data.size)
+ next_text.status = KIT_ERROR_BAD_ALLOC;
+ else
+ memcpy(tag_tail.data.values + n, next_text.data.values,
+ next_text.data.size);
+ }
+
+ ib_destroy(last);
+ last = ib_copy(next_text);
+
+ ib_destroy(comment_open);
+ ib_destroy(comment_text);
+ ib_destroy(comment_close);
+ ib_destroy(next_text);
+ }
+
if (last.status == KIT_OK) {
i64 n = res.tags.size;
DA_RESIZE(res.tags, n + 1);
@@ -156,8 +247,6 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
ib_destroy(tag_open);
ib_destroy(tag_name);
- ib_destroy(tag_close);
- ib_destroy(tag_close_empty);
ib_destroy(tag_tail);
}
diff --git a/source/kit/xml.h b/source/kit/xml.h
index c33392b..00dde52 100644
--- a/source/kit/xml.h
+++ b/source/kit/xml.h
@@ -20,6 +20,7 @@ typedef KIT_DA(kit_xml_property_t) kit_da_xml_property_t;
typedef KIT_DA(kit_xml_t) kit_da_xml_t;
struct kit_xml_ {
+ i8 is_declaration;
kit_str_builder_t tag;
kit_str_builder_t text;
kit_str_builder_t tail;
diff --git a/source/tests/xml.test.c b/source/tests/xml.test.c
index 52ff7c0..e477acb 100644
--- a/source/tests/xml.test.c
+++ b/source/tests/xml.test.c
@@ -10,6 +10,7 @@ TEST("xml parse tag") {
REQUIRE_EQ(res.status, KIT_OK);
if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
xml_destroy(&res.xml);
}
@@ -17,6 +18,21 @@ TEST("xml parse tag") {
is_destroy(is);
}
+TEST("xml parse tag with dash") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo-bar> </foo-bar>"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo-bar")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
TEST("xml parse tag not closed") {
is_handle_t is = IS_WRAP_STRING(SZ("<foo>"));
xml_parse_result_t res = xml_parse(is, NULL);
@@ -36,6 +52,7 @@ TEST("xml parse tag text") {
REQUIRE_EQ(res.status, KIT_OK);
if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
REQUIRE(AR_EQUAL(res.xml.text, SZ(" bar ")));
xml_destroy(&res.xml);
@@ -51,6 +68,7 @@ TEST("xml parse empty tag") {
REQUIRE_EQ(res.status, KIT_OK);
if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
xml_destroy(&res.xml);
}
@@ -143,4 +161,143 @@ TEST("xml parse child") {
is_destroy(is);
}
+TEST("xml parse declaration") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<?foo ?>"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 1);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse comment") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<!-- foo --><bar />"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("bar")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse comment before text") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<!-- foo --> bar <tag />"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.text, SZ(" bar ")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse comment after text") {
+ is_handle_t is = IS_WRAP_STRING(SZ("foo <!-- bar --><tag />"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.text, SZ("foo ")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse comment between text") {
+ is_handle_t is = IS_WRAP_STRING(
+ SZ("foo<!-- comment --> bar<tag />"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.text, SZ("foo bar")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse comment tail") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo /><!-- tail -->"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse comment tail before text") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo /><!-- tail --> bar"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE(AR_EQUAL(res.xml.tail, SZ(" bar")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse comment tail after text") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo /> bar <!-- tail -->"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE(AR_EQUAL(res.xml.tail, SZ(" bar ")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse comment tail between text") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<tag />foo<!-- tail --> bar"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("tag")));
+ REQUIRE(AR_EQUAL(res.xml.tail, SZ("foo bar")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
#undef KIT_TEST_FILE