From bbb397d327f84be61f90cb2744c3f29d395857fd Mon Sep 17 00:00:00 2001 From: Mitya Selivanov Date: Fri, 22 Sep 2023 16:50:23 +0200 Subject: xml: comments --- source/kit/xml.c | 149 ++++++++++++++++++++++++++++++++++++--------- source/kit/xml.h | 1 + source/tests/xml.test.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 277 insertions(+), 30 deletions(-) (limited to 'source') diff --git a/source/kit/xml.c b/source/kit/xml.c index ed11b18..d815e9b 100644 --- a/source/kit/xml.c +++ b/source/kit/xml.c @@ -21,6 +21,38 @@ static kit_xml_intermediate_t kit_xml_parse_buf_( ib_t tag_text = ib_until(begin, SZ("<")); last = ib_copy(tag_text); + for (;;) { + ib_t comment_open = ib_exact(last, SZ("")); + ib_t comment_close = ib_exact(comment_text, SZ("-->")); + ib_t next_text = ib_until(comment_close, SZ("<")); + + if (next_text.status == KIT_OK && next_text.data.size > 0) { + i64 n = tag_text.data.size; + DA_RESIZE(tag_text.data, n + next_text.data.size); + + if (tag_text.data.size != n + next_text.data.size) + next_text.status = KIT_ERROR_BAD_ALLOC; + else + memcpy(tag_text.data.values + n, next_text.data.values, + next_text.data.size); + } + + ib_destroy(last); + last = ib_copy(next_text); + + ib_destroy(comment_open); + ib_destroy(comment_text); + ib_destroy(comment_close); + ib_destroy(next_text); + } + DA_INIT(res.tags, 0, alloc); for (;;) { @@ -39,7 +71,19 @@ static kit_xml_intermediate_t kit_xml_parse_buf_( xml_t tag; memset(&tag, 0, sizeof tag); - spaces = ib_any(tag_open, SZ(" \t\r\n")); + ib_t decl_open = ib_exact(tag_open, SZ("?")); + + ib_destroy(last); + + if (decl_open.status == KIT_OK) { + tag.is_declaration = 1; + last = ib_copy(decl_open); + } else + last = ib_copy(tag_open); + + ib_destroy(decl_open); + + spaces = ib_any(last, SZ(" \t\r\n")); ib_t tag_name = ib_none(spaces, SZ(" \t\r\n/>")); ib_destroy(spaces); @@ -50,7 +94,7 @@ static kit_xml_intermediate_t kit_xml_parse_buf_( for (;;) { spaces = ib_any(last, SZ(" \t\r\n")); - ib_t property = ib_none(spaces, SZ(" \t\r\n=/>")); + ib_t property = ib_none(spaces, SZ(" \t\r\n=?/>")); ib_destroy(spaces); if (property.status != KIT_OK || property.data.size == 0) { @@ -94,45 +138,92 @@ static kit_xml_intermediate_t kit_xml_parse_buf_( ib_destroy(value_text); } - spaces = ib_any(last, SZ(" \t\r\n")); - ib_t tag_close = ib_exact(spaces, SZ(">")); - ib_t tag_close_empty = ib_exact(spaces, SZ("/>")); - ib_destroy(spaces); - - if (tag_close.status == KIT_OK) { - kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close, - alloc); - tag.text = im.text; - tag.children = im.tags; + spaces = ib_any(last, SZ(" \t\r\n")); - tagend_open = ib_exact(im.last, SZ("")); ib_destroy(spaces); - spaces = ib_any(tagend_name, SZ(" \t\r\n")); - ib_t tagend_close = ib_exact(spaces, SZ(">")); - ib_destroy(spaces); - ib_destroy(tagend_open); - ib_destroy(tagend_name); - - ib_destroy(last); - last = tagend_close; - } else if (tag_close_empty.status == KIT_OK) { ib_destroy(last); - last = ib_copy(tag_close_empty); + last = tag_decl_close; DA_INIT(tag.text, 0, alloc); DA_INIT(tag.children, 0, alloc); - } else - last.status = KIT_ERROR_INTERNAL; + } else { + ib_t tag_close = ib_exact(spaces, SZ(">")); + ib_t tag_close_empty = ib_exact(spaces, SZ("/>")); + ib_destroy(spaces); + + if (tag_close.status == KIT_OK) { + kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close, + alloc); + tag.text = im.text; + tag.children = im.tags; + + tagend_open = ib_exact(im.last, SZ("")); + ib_destroy(spaces); + ib_destroy(tagend_open); + ib_destroy(tagend_name); + + ib_destroy(last); + last = tagend_close; + + } else if (tag_close_empty.status == KIT_OK) { + ib_destroy(last); + last = ib_copy(tag_close_empty); + + DA_INIT(tag.text, 0, alloc); + DA_INIT(tag.children, 0, alloc); + } else + last.status = KIT_ERROR_INTERNAL; + + ib_destroy(tag_close); + ib_destroy(tag_close_empty); + } ib_t tag_tail = ib_until(last, SZ("<")); ib_destroy(last); last = ib_copy(tag_tail); + for (;;) { + ib_t comment_open = ib_exact(last, SZ("")); + ib_t comment_close = ib_exact(comment_text, SZ("-->")); + ib_t next_text = ib_until(comment_close, SZ("<")); + + if (next_text.status == KIT_OK && next_text.data.size > 0) { + i64 n = tag_tail.data.size; + DA_RESIZE(tag_tail.data, n + next_text.data.size); + + if (tag_tail.data.size != n + next_text.data.size) + next_text.status = KIT_ERROR_BAD_ALLOC; + else + memcpy(tag_tail.data.values + n, next_text.data.values, + next_text.data.size); + } + + ib_destroy(last); + last = ib_copy(next_text); + + ib_destroy(comment_open); + ib_destroy(comment_text); + ib_destroy(comment_close); + ib_destroy(next_text); + } + if (last.status == KIT_OK) { i64 n = res.tags.size; DA_RESIZE(res.tags, n + 1); @@ -156,8 +247,6 @@ static kit_xml_intermediate_t kit_xml_parse_buf_( ib_destroy(tag_open); ib_destroy(tag_name); - ib_destroy(tag_close); - ib_destroy(tag_close_empty); ib_destroy(tag_tail); } diff --git a/source/kit/xml.h b/source/kit/xml.h index c33392b..00dde52 100644 --- a/source/kit/xml.h +++ b/source/kit/xml.h @@ -20,6 +20,7 @@ typedef KIT_DA(kit_xml_property_t) kit_da_xml_property_t; typedef KIT_DA(kit_xml_t) kit_da_xml_t; struct kit_xml_ { + i8 is_declaration; kit_str_builder_t tag; kit_str_builder_t text; kit_str_builder_t tail; diff --git a/source/tests/xml.test.c b/source/tests/xml.test.c index 52ff7c0..e477acb 100644 --- a/source/tests/xml.test.c +++ b/source/tests/xml.test.c @@ -10,6 +10,7 @@ TEST("xml parse tag") { REQUIRE_EQ(res.status, KIT_OK); if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo"))); xml_destroy(&res.xml); } @@ -17,6 +18,21 @@ TEST("xml parse tag") { is_destroy(is); } +TEST("xml parse tag with dash") { + is_handle_t is = IS_WRAP_STRING(SZ(" ")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo-bar"))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + TEST("xml parse tag not closed") { is_handle_t is = IS_WRAP_STRING(SZ("")); xml_parse_result_t res = xml_parse(is, NULL); @@ -36,6 +52,7 @@ TEST("xml parse tag text") { REQUIRE_EQ(res.status, KIT_OK); if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo"))); REQUIRE(AR_EQUAL(res.xml.text, SZ(" bar "))); xml_destroy(&res.xml); @@ -51,6 +68,7 @@ TEST("xml parse empty tag") { REQUIRE_EQ(res.status, KIT_OK); if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo"))); xml_destroy(&res.xml); } @@ -143,4 +161,143 @@ TEST("xml parse child") { is_destroy(is); } +TEST("xml parse declaration") { + is_handle_t is = IS_WRAP_STRING(SZ("")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 1); + REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo"))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + +TEST("xml parse comment") { + is_handle_t is = IS_WRAP_STRING(SZ("")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.tag, SZ("bar"))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + +TEST("xml parse comment before text") { + is_handle_t is = IS_WRAP_STRING(SZ(" bar ")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.text, SZ(" bar "))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + +TEST("xml parse comment after text") { + is_handle_t is = IS_WRAP_STRING(SZ("foo ")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.text, SZ("foo "))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + +TEST("xml parse comment between text") { + is_handle_t is = IS_WRAP_STRING( + SZ("foo bar")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.text, SZ("foo bar"))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + +TEST("xml parse comment tail") { + is_handle_t is = IS_WRAP_STRING(SZ("")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo"))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + +TEST("xml parse comment tail before text") { + is_handle_t is = IS_WRAP_STRING(SZ(" bar")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo"))); + REQUIRE(AR_EQUAL(res.xml.tail, SZ(" bar"))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + +TEST("xml parse comment tail after text") { + is_handle_t is = IS_WRAP_STRING(SZ(" bar ")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo"))); + REQUIRE(AR_EQUAL(res.xml.tail, SZ(" bar "))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + +TEST("xml parse comment tail between text") { + is_handle_t is = IS_WRAP_STRING(SZ("foo bar")); + xml_parse_result_t res = xml_parse(is, NULL); + + REQUIRE_EQ(res.status, KIT_OK); + + if (res.status == KIT_OK) { + REQUIRE_EQ(res.xml.is_declaration, 0); + REQUIRE(AR_EQUAL(res.xml.tag, SZ("tag"))); + REQUIRE(AR_EQUAL(res.xml.tail, SZ("foo bar"))); + xml_destroy(&res.xml); + } + + is_destroy(is); +} + #undef KIT_TEST_FILE -- cgit v1.2.3