summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2023-09-19 05:34:00 +0200
committerMitya Selivanov <automainint@guattari.tech>2023-09-19 05:34:00 +0200
commit18c419ffb4e750c3c9ea8570cf18f3099267b1bb (patch)
treea1d936834a54934cb4407f870d2ac1dbaa87cd3b
parent8499b75fb011d72eeb5acbd85bcf41e4ee51e9a7 (diff)
downloadkit-18c419ffb4e750c3c9ea8570cf18f3099267b1bb.zip
Update xml parsing
-rw-r--r--README2
-rw-r--r--TODO9
-rw-r--r--source/kit/input_buffer.c134
-rw-r--r--source/kit/input_buffer.h22
-rw-r--r--source/kit/xml.c230
-rw-r--r--source/tests/input_buffer.test.c8
-rw-r--r--source/tests/xml.test.c85
7 files changed, 425 insertions, 65 deletions
diff --git a/README b/README
index cfb1851..56185ae 100644
--- a/README
+++ b/README
@@ -19,7 +19,7 @@ Features
- Mutual exclusion
- Threads
- Filesystem
- - File and memory mapping
+ - File mapping and shared memory
- Sockets wrapper
- Misc
- Input buffer
diff --git a/TODO b/TODO
index 5bcfa82..bdaf041 100644
--- a/TODO
+++ b/TODO
@@ -16,15 +16,8 @@ To-Do
- Better atomics support
- System call, run process
- Terminal
- - Inter-process transfer
- Sockets tests
- - System boilerplate (graphics, audio, input)
- - SDL
- - X11 (Linux)
- - Win32 (Windows)
- - Cocoa (macOS)
- - Emscripten (WebAssembly)
- - Parsing OpenGL interface
+ - Parsing OpenGL interface
- Utils
- parse, print
- String builder
diff --git a/source/kit/input_buffer.c b/source/kit/input_buffer.c
index 5081cc4..3ee4959 100644
--- a/source/kit/input_buffer.c
+++ b/source/kit/input_buffer.c
@@ -87,6 +87,23 @@ kit_ib_t kit_ib_wrap(kit_is_handle_t upstream,
return buf;
}
+kit_ib_t kit_ib_copy(kit_ib_t buf) {
+ kit_ib_t next;
+ memset(&next, 0, sizeof next);
+
+ if (buf.status != KIT_OK) {
+ next.status = buf.status;
+ return next;
+ }
+
+ kit_buf_acquire_(buf.internal);
+
+ next.offset = buf.offset;
+ next.internal = buf.internal;
+
+ return next;
+}
+
kit_ib_t kit_ib_read(kit_ib_t buf, i64 size) {
kit_ib_t next;
memset(&next, 0, sizeof next);
@@ -116,6 +133,118 @@ kit_ib_t kit_ib_read(kit_ib_t buf, i64 size) {
return next;
}
+kit_ib_t kit_ib_any(kit_ib_t buf, kit_str_t data) {
+ kit_ib_t next;
+ memset(&next, 0, sizeof next);
+
+ if (buf.status != KIT_OK) {
+ next.status = buf.status;
+ return next;
+ }
+
+ kit_buf_acquire_(buf.internal);
+
+ DA_INIT(next.data, 0, kit_buf_alloc_(buf.internal));
+
+ i64 size = 0;
+
+ for (;; ++size) {
+ kit_buf_adjust_(buf.internal, buf.offset + size + 1);
+
+ DA_RESIZE(next.data, size + 1);
+
+ assert(next.data.size == size + 1);
+ if (next.data.size != size + 1) {
+ next.status = KIT_ERROR_BAD_ALLOC;
+ return next;
+ }
+
+ kit_str_t destination = { .size = 1,
+ .values = next.data.values + size };
+ i64 n = kit_buf_read_(buf.internal, buf.offset + size,
+ destination);
+
+ if (n != 1)
+ break;
+
+ i8 found = 0;
+
+ for (i64 i = 0; i < data.size; i++)
+ if (data.values[i] == destination.values[0]) {
+ found = 1;
+ break;
+ }
+
+ if (!found)
+ break;
+ }
+
+ next.offset = buf.offset + size;
+ next.internal = buf.internal;
+
+ DA_RESIZE(next.data, size);
+ if (next.data.size != size)
+ next.status = KIT_ERROR_BAD_ALLOC;
+
+ return next;
+}
+
+kit_ib_t kit_ib_none(kit_ib_t buf, kit_str_t data) {
+ kit_ib_t next;
+ memset(&next, 0, sizeof next);
+
+ if (buf.status != KIT_OK) {
+ next.status = buf.status;
+ return next;
+ }
+
+ kit_buf_acquire_(buf.internal);
+
+ DA_INIT(next.data, 0, kit_buf_alloc_(buf.internal));
+
+ i64 size = 0;
+
+ for (;; ++size) {
+ kit_buf_adjust_(buf.internal, buf.offset + size + 1);
+
+ DA_RESIZE(next.data, size + 1);
+
+ assert(next.data.size == size + 1);
+ if (next.data.size != size + 1) {
+ next.status = KIT_ERROR_BAD_ALLOC;
+ return next;
+ }
+
+ kit_str_t destination = { .size = 1,
+ .values = next.data.values + size };
+ i64 n = kit_buf_read_(buf.internal, buf.offset + size,
+ destination);
+
+ if (n != 1)
+ break;
+
+ i8 found = 0;
+
+ for (i64 i = 0; i < data.size; i++)
+ if (data.values[i] == destination.values[0]) {
+ found = 1;
+ break;
+ }
+
+ if (found)
+ break;
+ }
+
+ next.offset = buf.offset + size;
+ next.internal = buf.internal;
+
+ DA_RESIZE(next.data, size);
+ if (next.data.size != size)
+ next.status = KIT_ERROR_BAD_ALLOC;
+
+ return next;
+}
+
kit_ib_t kit_ib_exact(kit_ib_t buf, kit_str_t data) {
kit_ib_t res = kit_ib_read(buf, data.size);
if (!AR_EQUAL(res.data, data))
@@ -177,7 +306,8 @@ kit_ib_t kit_ib_until(kit_ib_t buf, kit_str_t data) {
}
kit_ib_t kit_ib_while(kit_ib_t buf,
- kit_ib_read_condition_fn condition) {
+ kit_ib_read_condition_fn condition,
+ void *context) {
kit_ib_t next;
memset(&next, 0, sizeof next);
@@ -209,7 +339,7 @@ kit_ib_t kit_ib_while(kit_ib_t buf,
destination);
kit_str_t data = { .size = size + 1, .values = next.data.values };
- if (n != 1 || condition == NULL || condition(data) == 0)
+ if (n != 1 || condition == NULL || condition(data, context) == 0)
break;
}
diff --git a/source/kit/input_buffer.h b/source/kit/input_buffer.h
index e34b512..a40ec99 100644
--- a/source/kit/input_buffer.h
+++ b/source/kit/input_buffer.h
@@ -16,24 +16,38 @@ typedef struct {
kit_str_builder_t data;
} kit_ib_t;
-typedef i8 (*kit_ib_read_condition_fn)(kit_str_t data);
+typedef i8 (*kit_ib_read_condition_fn)(kit_str_t data, void *context);
kit_ib_t kit_ib_wrap(kit_is_handle_t upstream,
kit_allocator_t *alloc);
+kit_ib_t kit_ib_copy(kit_ib_t buf);
+
kit_ib_t kit_ib_read(kit_ib_t buf, i64 size);
+kit_ib_t kit_ib_any(kit_ib_t buf, kit_str_t data);
+
+kit_ib_t kit_ib_none(kit_ib_t buf, kit_str_t data);
+
kit_ib_t kit_ib_exact(kit_ib_t buf, kit_str_t data);
kit_ib_t kit_ib_until(kit_ib_t buf, kit_str_t data);
kit_ib_t kit_ib_while(kit_ib_t buf,
- kit_ib_read_condition_fn condition);
+ kit_ib_read_condition_fn condition,
+ void *context);
void kit_ib_destroy(kit_ib_t buf);
#define KIT_IB_WRAP(upstream) kit_ib_wrap(upstream, NULL)
+#define KIT_IB_SKIP(buf_, proc_, ...) \
+ do { \
+ kit_ib_t temp_buf_ = (buf_); \
+ (buf_) = proc_((buf_), __VA_ARGS__); \
+ kit_ib_destroy((temp_buf_)); \
+ } while (0)
+
#ifdef __cplusplus
}
#endif
@@ -42,13 +56,17 @@ void kit_ib_destroy(kit_ib_t buf);
# define ib_t kit_ib_t
# define ib_read_condition_fn kit_ib_read_condition_fn
# define ib_wrap kit_ib_wrap
+# define ib_copy kit_ib_copy
# define ib_read kit_ib_read
+# define ib_any kit_ib_any
+# define ib_none kit_ib_none
# define ib_exact kit_ib_exact
# define ib_until kit_ib_until
# define ib_while kit_ib_while
# define ib_destroy kit_ib_destroy
# define IB_WRAP KIT_IB_WRAP
+# define IB_SKIP KIT_IB_SKIP
#endif
#endif
diff --git a/source/kit/xml.c b/source/kit/xml.c
index bb16192..2bef6d5 100644
--- a/source/kit/xml.c
+++ b/source/kit/xml.c
@@ -3,73 +3,197 @@
#include "input_buffer.h"
#include <assert.h>
-kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
- kit_allocator_t *alloc) {
- xml_parse_result_t res;
+typedef struct {
+ ib_t last;
+ kit_xml_t xml;
+} kit_xml_intermediate_t;
+
+static kit_xml_intermediate_t kit_xml_parse_buf_(
+ ib_t begin, kit_allocator_t *alloc) {
+ kit_xml_intermediate_t res;
memset(&res, 0, sizeof res);
- ib_t begin = ib_wrap(is, alloc);
- ib_t tag_before = ib_until(begin, SZ("<"));
- ib_t tag_open = ib_exact(tag_before, SZ("<"));
- ib_t tag_name = ib_until(tag_open, SZ(">"));
- ib_t tag_name_empty = ib_until(tag_open, SZ("/"));
-
-#define return_ \
- ib_destroy(begin); \
- ib_destroy(tag_before); \
- ib_destroy(tag_open); \
- ib_destroy(tag_name); \
- ib_destroy(tag_name_empty); \
- ib_destroy(tag_close); \
+ ib_t last, spaces;
+ memset(&last, 0, sizeof last);
+ memset(&spaces, 0, sizeof spaces);
+
+ ib_t tag_open = ib_exact(begin, SZ("<"));
+ ib_t tag_name = ib_none(tag_open, SZ(" \t\r\n/>"));
+ last = ib_copy(tag_name);
+
+ DA_INIT(res.xml.properties, 0, alloc);
+
+ for (;;) {
+ spaces = ib_any(last, SZ(" \t\r\n"));
+ ib_t property = ib_none(spaces, SZ(" \t\r\n=/>"));
+ ib_destroy(spaces);
+
+ if (property.status != KIT_OK || property.data.size == 0) {
+ ib_destroy(property);
+ break;
+ }
+
+ spaces = ib_any(property, SZ(" \t\r\n"));
+ ib_t equals = ib_exact(spaces, SZ("="));
+ ib_destroy(spaces);
+ spaces = ib_any(equals, SZ(" \t\r\n"));
+ ib_t value_open = ib_exact(spaces, SZ("\""));
+ ib_t value_text = ib_until(value_open, SZ("\""));
+ ib_t value_close = ib_exact(value_text, SZ("\""));
+
+ if (value_close.status == KIT_OK) {
+ i64 n = res.xml.properties.size;
+ DA_RESIZE(res.xml.properties, n + 1);
+
+ if (res.xml.properties.size != n + 1) {
+ res.last = value_close;
+ res.last.status = KIT_ERROR_BAD_ALLOC;
+
+ ib_destroy(begin);
+ ib_destroy(tag_open);
+ ib_destroy(tag_name);
+ ib_destroy(last);
+ ib_destroy(spaces);
+ ib_destroy(property);
+ ib_destroy(equals);
+ ib_destroy(value_open);
+ ib_destroy(value_text);
+
+ DA_DESTROY(res.xml.properties);
+ return res;
+ }
+
+ // move
+ res.xml.properties.values[n].name = property.data;
+ memset(&property.data, 0, sizeof property.data);
+
+ // move
+ res.xml.properties.values[n].value = value_text.data;
+ memset(&value_text.data, 0, sizeof value_text.data);
+ }
+
+ ib_destroy(last);
+ last = ib_copy(value_close);
+
+ ib_destroy(spaces);
+ ib_destroy(property);
+ ib_destroy(equals);
+ ib_destroy(value_open);
+ ib_destroy(value_text);
+ ib_destroy(value_close);
+
+ if (value_close.status != KIT_OK)
+ break;
+ }
+
+ spaces = ib_any(last, SZ(" \t\r\n"));
+ ib_t tag_close = ib_exact(spaces, SZ(">"));
+ ib_t tag_close_empty = ib_exact(spaces, SZ("/>"));
+
+ ib_destroy(last);
+ ib_destroy(spaces);
+
+#define return_ \
+ ib_destroy(begin); \
+ ib_destroy(tag_open); \
+ ib_destroy(tag_name); \
+ ib_destroy(tag_close); \
+ ib_destroy(tag_close_empty); \
return
- if (tag_name_empty.offset < tag_name.offset) {
- ib_t tag_close = ib_exact(tag_name_empty, SZ("/>"));
+ if (tag_close_empty.status == KIT_OK) {
+ ib_t tag_tail = ib_until(tag_close_empty, SZ("<"));
+
+ if (tag_tail.status != KIT_OK) {
+ res.last = tag_tail;
- if (tag_close.status != KIT_OK) {
- res.status = KIT_ERROR_INTERNAL;
+ DA_DESTROY(res.xml.properties);
return_ res;
}
// move
- res.xml.tag = tag_name_empty.data;
- memset(&tag_name_empty.data, 0, sizeof tag_name_empty.data);
+ res.xml.tag = tag_name.data;
+ memset(&tag_name.data, 0, sizeof tag_name.data);
- while (res.xml.tag.size > 0 &&
- res.xml.tag.values[res.xml.tag.size - 1] == ' ')
- --res.xml.tag.size;
+ // move
+ res.xml.tail = tag_tail.data;
+ memset(&tag_tail.data, 0, sizeof tag_tail.data);
DA_INIT(res.xml.text, 0, alloc);
- DA_INIT(res.xml.tail, 0, alloc);
- DA_INIT(res.xml.properties, 0, alloc);
DA_INIT(res.xml.children, 0, alloc);
- res.status = KIT_OK;
+ res.last = tag_tail;
return_ res;
}
- ib_t tag_close = ib_exact(tag_name, SZ(">"));
- ib_t tag_text = ib_until(tag_close, SZ("<"));
- ib_t tagend_open = ib_exact(tag_text, SZ("</"));
+ ib_t tag_text = ib_until(tag_close, SZ("<"));
+ last = ib_copy(tag_text);
+
+ DA_INIT(res.xml.children, 0, alloc);
+
+ if (last.status == KIT_OK)
+ for (;;) {
+ // TODO
+ // Refactor: make this outer loop.
+ //
+
+ ib_t next = ib_exact(last, SZ("</"));
+
+ if (next.status == KIT_OK) {
+ ib_destroy(last);
+ last = next;
+ break;
+ }
+
+ ib_destroy(next);
+
+ kit_xml_intermediate_t im = kit_xml_parse_buf_(last, alloc);
+
+ last = im.last;
+
+ if (last.status != KIT_OK)
+ break;
+
+ i64 n = res.xml.children.size;
+ DA_RESIZE(res.xml.children, n + 1);
+
+ if (res.xml.children.size != n + 1) {
+ res.last = last;
+ res.last.status = KIT_ERROR_BAD_ALLOC;
+
+ ib_destroy(tag_text);
+
+ DA_DESTROY(res.xml.properties);
+ DA_DESTROY(res.xml.children);
+ return_ res;
+ }
+
+ res.xml.children.values[n] = im.xml;
+ }
+
+ ib_t tagend_open = last;
ib_t tagend_name = ib_exact(tagend_open, WRAP_STR(tag_name.data));
ib_t tagend_close = ib_exact(tagend_name, SZ(">"));
+ ib_t tag_tail = ib_until(tagend_close, SZ("<"));
#undef return_
-#define return_ \
- ib_destroy(begin); \
- ib_destroy(tag_before); \
- ib_destroy(tag_open); \
- ib_destroy(tag_name); \
- ib_destroy(tag_name_empty); \
- ib_destroy(tag_close); \
- ib_destroy(tag_text); \
- ib_destroy(tagend_open); \
- ib_destroy(tagend_name); \
- ib_destroy(tagend_close); \
+#define return_ \
+ ib_destroy(begin); \
+ ib_destroy(tag_open); \
+ ib_destroy(tag_name); \
+ ib_destroy(tag_close); \
+ ib_destroy(tag_close_empty); \
+ ib_destroy(tag_text); \
+ ib_destroy(tagend_open); \
+ ib_destroy(tagend_name); \
+ ib_destroy(tagend_close); \
return
- if (tagend_close.status != KIT_OK) {
- res.status = KIT_ERROR_INTERNAL;
+ if (tag_tail.status != KIT_OK) {
+ res.last = tag_tail;
+
+ DA_DESTROY(res.xml.properties);
+ DA_DESTROY(res.xml.children);
return_ res;
}
@@ -81,15 +205,25 @@ kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
res.xml.text = tag_text.data;
memset(&tag_text.data, 0, sizeof tag_text.data);
- DA_INIT(res.xml.tail, 0, alloc);
- DA_INIT(res.xml.properties, 0, alloc);
- DA_INIT(res.xml.children, 0, alloc);
+ // move
+ res.xml.tail = tag_tail.data;
+ memset(&tag_tail.data, 0, sizeof tag_tail.data);
- res.status = KIT_OK;
+ res.last = tag_tail;
return_ res;
#undef return_
}
+kit_xml_parse_result_t kit_xml_parse(kit_is_handle_t is,
+ kit_allocator_t *alloc) {
+ kit_xml_intermediate_t im = kit_xml_parse_buf_(ib_wrap(is, alloc),
+ alloc);
+ kit_xml_parse_result_t res = { .status = im.last.status,
+ .xml = im.xml };
+ ib_destroy(im.last);
+ return res;
+}
+
kit_xml_print_result_t kit_xml_print(kit_xml_t *xml,
kit_allocator_t *alloc) {
assert(xml != NULL);
diff --git a/source/tests/input_buffer.test.c b/source/tests/input_buffer.test.c
index 237d60d..89717d1 100644
--- a/source/tests/input_buffer.test.c
+++ b/source/tests/input_buffer.test.c
@@ -56,7 +56,7 @@ TEST("input buffer read twice") {
is_destroy(in);
}
-static i8 is_integer_(str_t const data) {
+static i8 is_integer_(str_t const data, void *_) {
for (ptrdiff_t i = 0; i < data.size; i++)
if (data.values[i] < '0' || data.values[i] > '9')
return 0;
@@ -69,7 +69,7 @@ TEST("input buffer read integer once") {
is_handle_t in = IS_WRAP_STRING(text);
ib_t first = IB_WRAP(in);
- ib_t second = ib_while(first, is_integer_);
+ ib_t second = ib_while(first, is_integer_, NULL);
REQUIRE(second.status == KIT_OK);
REQUIRE(second.data.size == 5);
@@ -87,9 +87,9 @@ TEST("input buffer read integer twice") {
is_handle_t in = IS_WRAP_STRING(text);
ib_t first = IB_WRAP(in);
- ib_t second = ib_while(first, is_integer_);
+ ib_t second = ib_while(first, is_integer_, NULL);
ib_t third = ib_read(second, 1);
- ib_t fourth = ib_while(third, is_integer_);
+ ib_t fourth = ib_while(third, is_integer_, NULL);
REQUIRE(fourth.status == KIT_OK);
REQUIRE(second.data.size == 3);
diff --git a/source/tests/xml.test.c b/source/tests/xml.test.c
index 7d2151c..52ff7c0 100644
--- a/source/tests/xml.test.c
+++ b/source/tests/xml.test.c
@@ -58,4 +58,89 @@ TEST("xml parse empty tag") {
is_destroy(is);
}
+TEST("xml parse tail") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo></foo> bar"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE(AR_EQUAL(res.xml.tail, SZ(" bar")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse empty tail") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo /> bar"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE(AR_EQUAL(res.xml.tail, SZ(" bar")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse property") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo bar=\"42\"></foo>"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE_EQ(res.xml.properties.size, 1);
+ if (res.xml.properties.size == 1) {
+ REQUIRE(AR_EQUAL(res.xml.properties.values[0].name, SZ("bar")));
+ REQUIRE(AR_EQUAL(res.xml.properties.values[0].value, SZ("42")));
+ }
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse empty property") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo bar=\"42\" />"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE_EQ(res.xml.properties.size, 1);
+ if (res.xml.properties.size == 1) {
+ REQUIRE(AR_EQUAL(res.xml.properties.values[0].name, SZ("bar")));
+ REQUIRE(AR_EQUAL(res.xml.properties.values[0].value, SZ("42")));
+ }
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
+TEST("xml parse child") {
+ is_handle_t is = IS_WRAP_STRING(SZ("<foo><bar></bar></foo>"));
+ xml_parse_result_t res = xml_parse(is, NULL);
+
+ REQUIRE_EQ(res.status, KIT_OK);
+
+ if (res.status == KIT_OK) {
+ REQUIRE(AR_EQUAL(res.xml.tag, SZ("foo")));
+ REQUIRE_EQ(res.xml.children.size, 1);
+ if (res.xml.children.size == 1)
+ REQUIRE(AR_EQUAL(res.xml.children.values[0].tag, SZ("bar")));
+ xml_destroy(&res.xml);
+ }
+
+ is_destroy(is);
+}
+
#undef KIT_TEST_FILE