summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2023-09-22 18:34:05 +0200
committerMitya Selivanov <automainint@guattari.tech>2023-09-22 18:34:05 +0200
commita6b362e7bedca7f0dfe9e352ea7a895e4ac7e3c4 (patch)
treea14b9f2e24c0dafbbc1a1605564f1acddbb38c55
parentbbb397d327f84be61f90cb2744c3f29d395857fd (diff)
downloadkit-a6b362e7bedca7f0dfe9e352ea7a895e4ac7e3c4.zip
xml escapes; xml full text
-rw-r--r--source/kit/xml.c216
-rw-r--r--source/kit/xml.h13
-rw-r--r--source/tests/xml.test.c106
3 files changed, 272 insertions, 63 deletions
diff --git a/source/kit/xml.c b/source/kit/xml.c
index d815e9b..1c8e9f6 100644
--- a/source/kit/xml.c
+++ b/source/kit/xml.c
@@ -9,17 +9,49 @@ typedef struct {
kit_da_xml_t tags;
} kit_xml_intermediate_t;
-static kit_xml_intermediate_t kit_xml_parse_buf_(
- ib_t begin, kit_allocator_t *alloc) {
- kit_xml_intermediate_t res;
- memset(&res, 0, sizeof res);
+static kit_status_t kit_xml_unescape_(str_builder_t *str) {
+ assert(str != NULL);
+
+ str_builder_t buf;
+ DA_INIT(buf, str->size, str->alloc);
+ buf.size = 0;
+
+ for (i64 i = 0; i < str->size; i++)
+ if (str->values[i] != '&')
+ buf.values[buf.size++] = str->values[i];
+ else {
+ i64 n = 1;
+ while (i + n < str->size && str->values[i + n] != ';') n++;
+ if (i + n >= str->size) {
+ DA_DESTROY(buf);
+ return KIT_ERROR_INTERNAL;
+ }
+ if (n == 3 && memcmp(str->values + i, "&lt;", 4) == 0)
+ buf.values[buf.size++] = '<';
+ else if (n == 3 && memcmp(str->values + i, "&gt;", 4) == 0)
+ buf.values[buf.size++] = '>';
+ else if (n == 4 && memcmp(str->values + i, "&amp;", 5) == 0)
+ buf.values[buf.size++] = '&';
+ else if (n == 5 && memcmp(str->values + i, "&quot;", 6) == 0)
+ buf.values[buf.size++] = '"';
+ else if (n == 5 && memcmp(str->values + i, "&apos;", 6) == 0)
+ buf.values[buf.size++] = '\'';
+ else {
+ DA_DESTROY(buf);
+ return KIT_ERROR_INTERNAL;
+ }
+ i += n;
+ }
- ib_t last, spaces;
- memset(&last, 0, sizeof last);
- memset(&spaces, 0, sizeof spaces);
+ DA_DESTROY(*str);
+ *str = buf;
- ib_t tag_text = ib_until(begin, SZ("<"));
- last = ib_copy(tag_text);
+ return KIT_OK;
+}
+
+static ib_t kit_xml_parse_text_(ib_t begin) {
+ ib_t text = ib_until(begin, SZ("<"));
+ ib_t last = ib_copy(text);
for (;;) {
ib_t comment_open = ib_exact(last, SZ("<!--"));
@@ -34,13 +66,13 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
ib_t next_text = ib_until(comment_close, SZ("<"));
if (next_text.status == KIT_OK && next_text.data.size > 0) {
- i64 n = tag_text.data.size;
- DA_RESIZE(tag_text.data, n + next_text.data.size);
+ i64 n = text.data.size;
+ DA_RESIZE(text.data, n + next_text.data.size);
- if (tag_text.data.size != n + next_text.data.size)
+ if (text.data.size != n + next_text.data.size)
next_text.status = KIT_ERROR_BAD_ALLOC;
else
- memcpy(tag_text.data.values + n, next_text.data.values,
+ memcpy(text.data.values + n, next_text.data.values,
next_text.data.size);
}
@@ -53,6 +85,58 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
ib_destroy(next_text);
}
+ // move
+ DA_DESTROY(last.data);
+ last.data = text.data;
+ memset(&text.data, 0, sizeof text.data);
+
+ kit_status_t s = kit_xml_unescape_(&last.data);
+ if (s != KIT_OK)
+ last.status = s;
+
+ ib_destroy(text);
+
+ return last;
+}
+
+static ib_t kit_xml_parse_string_(ib_t begin) {
+ ib_t quotes_open = ib_exact(begin, SZ("\""));
+ ib_t apostr_open = ib_exact(begin, SZ("'"));
+
+ ib_t open = quotes_open.status == KIT_OK ? quotes_open
+ : apostr_open;
+
+ ib_t text = ib_until(open, WRAP_STR(open.data));
+ ib_t close = ib_exact(text, WRAP_STR(open.data));
+
+ // move
+ DA_DESTROY(close.data);
+ close.data = text.data;
+ memset(&text.data, 0, sizeof text.data);
+
+ kit_status_t s = kit_xml_unescape_(&close.data);
+ if (s == KIT_OK)
+ close.status = s;
+
+ ib_destroy(quotes_open);
+ ib_destroy(apostr_open);
+ ib_destroy(text);
+
+ return close;
+}
+
+static kit_xml_intermediate_t kit_xml_parse_buf_(
+ ib_t begin, kit_allocator_t *alloc) {
+ kit_xml_intermediate_t res;
+ memset(&res, 0, sizeof res);
+
+ ib_t last, spaces;
+ memset(&last, 0, sizeof last);
+ memset(&spaces, 0, sizeof spaces);
+
+ ib_t tag_text = kit_xml_parse_text_(begin);
+ last = ib_copy(tag_text);
+
DA_INIT(res.tags, 0, alloc);
for (;;) {
@@ -105,14 +189,12 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
spaces = ib_any(property, SZ(" \t\r\n"));
ib_t equals = ib_exact(spaces, SZ("="));
ib_destroy(spaces);
- spaces = ib_any(equals, SZ(" \t\r\n"));
- ib_t value_open = ib_exact(spaces, SZ("\""));
+ spaces = ib_any(equals, SZ(" \t\r\n"));
+ ib_t value = kit_xml_parse_string_(spaces);
ib_destroy(spaces);
- ib_t value_text = ib_until(value_open, SZ("\""));
- ib_t value_close = ib_exact(value_text, SZ("\""));
ib_destroy(last);
- last = value_close;
+ last = ib_copy(value);
if (last.status == KIT_OK) {
i64 n = tag.properties.size;
@@ -127,15 +209,14 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
memset(&property.data, 0, sizeof property.data);
// move
- tag.properties.values[n].value = value_text.data;
- memset(&value_text.data, 0, sizeof value_text.data);
+ tag.properties.values[n].value = value.data;
+ memset(&value.data, 0, sizeof value.data);
}
}
ib_destroy(property);
ib_destroy(equals);
- ib_destroy(value_open);
- ib_destroy(value_text);
+ ib_destroy(value);
}
spaces = ib_any(last, SZ(" \t\r\n"));
@@ -187,43 +268,11 @@ static kit_xml_intermediate_t kit_xml_parse_buf_(
ib_destroy(tag_close_empty);
}
- ib_t tag_tail = ib_until(last, SZ("<"));
+ ib_t tag_tail = kit_xml_parse_text_(last);
ib_destroy(last);
last = ib_copy(tag_tail);
- for (;;) {
- ib_t comment_open = ib_exact(last, SZ("<!--"));
-
- if (comment_open.status != KIT_OK) {
- ib_destroy(comment_open);
- break;
- }
-
- ib_t comment_text = ib_until(comment_open, SZ("-->"));
- ib_t comment_close = ib_exact(comment_text, SZ("-->"));
- ib_t next_text = ib_until(comment_close, SZ("<"));
-
- if (next_text.status == KIT_OK && next_text.data.size > 0) {
- i64 n = tag_tail.data.size;
- DA_RESIZE(tag_tail.data, n + next_text.data.size);
-
- if (tag_tail.data.size != n + next_text.data.size)
- next_text.status = KIT_ERROR_BAD_ALLOC;
- else
- memcpy(tag_tail.data.values + n, next_text.data.values,
- next_text.data.size);
- }
-
- ib_destroy(last);
- last = ib_copy(next_text);
-
- ib_destroy(comment_open);
- ib_destroy(comment_text);
- ib_destroy(comment_close);
- ib_destroy(next_text);
- }
-
if (last.status == KIT_OK) {
i64 n = res.tags.size;
DA_RESIZE(res.tags, n + 1);
@@ -299,17 +348,68 @@ kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
return res;
}
-kit_xml_print_result_t kit_xml_print(kit_xml_t *xml,
- kit_allocator_t *alloc) {
+kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) {
assert(xml != NULL);
- kit_xml_print_result_t result;
+ xml_text_t result;
memset(&result, 0, sizeof result);
result.status = KIT_ERROR_NOT_IMPLEMENTED;
return result;
}
+static kit_status_t kit_xml_append_text_(str_builder_t *buf,
+ xml_t *xml) {
+ assert(buf != NULL);
+ assert(xml != NULL);
+
+ i64 n = buf->size;
+ DA_RESIZE(*buf, n + xml->text.size);
+
+ assert(buf->size == n + xml->text.size);
+ if (buf->size != n + xml->text.size)
+ return KIT_ERROR_BAD_ALLOC;
+
+ memcpy(buf->values + n, xml->text.values, xml->text.size);
+
+ for (i64 i = 0; i < xml->children.size; i++) {
+ kit_status_t s = kit_xml_append_text_(buf,
+ xml->children.values + i);
+ if (s != KIT_OK)
+ return s;
+
+ str_t tail = WRAP_STR(xml->children.values[i].tail);
+
+ if (tail.size <= 0)
+ continue;
+
+ n = buf->size;
+ DA_RESIZE(*buf, n + tail.size);
+
+ assert(buf->size == n + tail.size);
+ if (buf->size != n + tail.size)
+ return KIT_ERROR_BAD_ALLOC;
+
+ memcpy(buf->values + n, tail.values, tail.size);
+ }
+
+ return KIT_OK;
+}
+
+kit_xml_text_t kit_xml_full_text(kit_xml_t *xml,
+ kit_allocator_t *alloc) {
+ kit_xml_text_t res;
+ res.status = KIT_OK;
+ DA_INIT(res.text, 0, alloc);
+
+ if (xml != NULL)
+ res.status = kit_xml_append_text_(&res.text, xml);
+ else
+ res.status = KIT_ERROR_INVALID_ARGUMENT;
+
+ return res;
+}
+
void kit_xml_destroy(kit_xml_t *xml) {
assert(xml != NULL);
if (xml == NULL)
diff --git a/source/kit/xml.h b/source/kit/xml.h
index 00dde52..6e04878 100644
--- a/source/kit/xml.h
+++ b/source/kit/xml.h
@@ -35,14 +35,15 @@ typedef struct {
typedef struct {
kit_status_t status;
- kit_str_builder_t xml;
-} kit_xml_print_result_t;
+ kit_str_builder_t text;
+} kit_xml_text_t;
kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
kit_allocator_t *alloc);
-kit_xml_print_result_t kit_xml_print(kit_xml_t *xml,
- kit_allocator_t *alloc);
-void kit_xml_destroy(kit_xml_t *xml);
+kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc);
+kit_xml_text_t kit_xml_full_text(kit_xml_t *xml,
+ kit_allocator_t *alloc);
+void kit_xml_destroy(kit_xml_t *xml);
#ifdef __cplusplus
}
@@ -51,9 +52,11 @@ void kit_xml_destroy(kit_xml_t *xml);
#ifndef KIT_DISABLE_SHORT_NAMES
# define xml_parse kit_xml_parse
# define xml_print kit_xml_print
+# define xml_full_text kit_xml_full_text
# define xml_destroy kit_xml_destroy
# define xml_t kit_xml_t
# define xml_parse_result_t kit_xml_parse_result_t
+# define xml_text_t kit_xml_text_t
#endif
#endif
diff --git a/source/tests/xml.test.c b/source/tests/xml.test.c
index e477acb..2a0b114 100644
--- a/source/tests/xml.test.c
+++ b/source/tests/xml.test.c
@@ -161,6 +161,26 @@ TEST("xml parse child") {
is_destroy(is);
}
+TEST("xml parse child with text and tail") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo>text<bar /> tail</foo>"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE(AR_EQUAL(res.xml.text, SZ("text")));
+ REQUIRE_EQ(res.xml.children.size, 1);
+ if (res.xml.children.size == 1) {
+ REQUIRE(AR_EQUAL(res.xml.children.values[0].tag, SZ("bar")));
+ REQUIRE(AR_EQUAL(res.xml.children.values[0].tail, SZ(" tail")));
+ }
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
TEST("xml parse declaration") {
is_handle_t is = IS_WRAP_STRING(SZ("<?foo ?>"));
xml_parse_result_t res = xml_parse(is, NULL);
@@ -300,4 +320,90 @@ TEST("xml parse comment tail between text") {
is_destroy(is);
}
+TEST("xml parse escaped text") {
+ is_handle_t is = IS_WRAP_STRING(SZ("&lt;foo&gt;"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.text, SZ("<foo>")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse escaped quote property") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo bar=\"&amp;&quot;\" />"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE_EQ(res.xml.properties.size, 1);
+ if (res.xml.properties.size == 1) {
+ REQUIRE(AR_EQUAL(res.xml.properties.values[0].name, SZ("bar")));
+ REQUIRE(
+ AR_EQUAL(res.xml.properties.values[0].value, SZ("&\"")));
+ }
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse escaped apostrophe property") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo bar='&amp;&apos;' />"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE_EQ(res.xml.is_declaration, 0);
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE_EQ(res.xml.properties.size, 1);
+ if (res.xml.properties.size == 1) {
+ REQUIRE(AR_EQUAL(res.xml.properties.values[0].name, SZ("bar")));
+ REQUIRE(AR_EQUAL(res.xml.properties.values[0].value, SZ("&'")));
+ }
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse invalid escape") {
+ is_handle_t is = IS_WRAP_STRING(SZ("&foobar;"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_ERROR_INTERNAL);
+
+ if (res.status == KIT_OK)
+ xml_destroy(&res.xml);
+
+ is_destroy(is);
+}
+
+TEST("xml full text") {
+ is_handle_t is = IS_WRAP_STRING(
+ SZ("<tag>foo <a>text</a> bar <b>text</b> tail</tag>"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ xml_text_t text = xml_full_text(&res.xml, NULL);
+ REQUIRE_EQ(text.status, KIT_OK);
+ REQUIRE(AR_EQUAL(text.text, SZ("foo text bar text tail")));
+ DA_DESTROY(text.text);
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
#undef KIT_TEST_FILE