From 2654a133cb1d94a0505c478f90dfc608640716e9 Mon Sep 17 00:00:00 2001 From: Mitya Selivanov Date: Mon, 22 Jan 2024 13:02:28 +0100 Subject: xml: escaped chars --- source/kit/xml.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'source') diff --git a/source/kit/xml.c b/source/kit/xml.c index fbe4ec2..8d22bf3 100644 --- a/source/kit/xml.c +++ b/source/kit/xml.c @@ -46,7 +46,70 @@ static s32 kit_xml_alloc_and_unescape_(str_builder_t *dst, str_t str, dst->values[dst->size++] = '"'; else if (n == 5 && memcmp(str.values + i, "'", 6) == 0) dst->values[dst->size++] = '\''; - else { + else if (n - 2 <= 8 && str.values[i + 1] == '#' && + str.values[i + 2] == 'x') { + // hex encoding + // + + c8 buf[8]; + u64 x = 0; + + memcpy(buf, str.values + (i + 3), n - 2); + + for (i64 k = 0; k < n - 2; k++) { + c8 c = str.values[i + 3 + k]; + x <<= 8; + if (c >= '0' && c <= '9') + x |= (c - '0'); + else if (c >= 'a' && c <= 'f') + x |= 10 + (c - 'a'); + else if (c >= 'A' && c <= 'F') + x |= 10 + (c - 'A'); + else { + x = 0; + break; + } + } + + if (x == 0 || x > 255u) { + // TODO + // UTF-8 encoding + + DA_DESTROY(*dst); + return KIT_PARSING_FAILED; + } + + dst->values[dst->size++] = (c8) x; + } else if (n - 1 <= 20 && str.values[i + 1] == '#') { + // dec encoding + // + + c8 buf[20]; + u64 x = 0; + + memcpy(buf, str.values + (i + 2), n - 2); + + for (i64 k = 0; k < n - 1; k++) { + c8 c = str.values[i + 2 + k]; + x *= 10; + if (c >= '0' && c <= '9') + x += (c - '0'); + else { + x = 0; + break; + } + } + + if (x == 0 || x > 255u) { + // TODO + // UTF-8 encoding + + DA_DESTROY(*dst); + return KIT_PARSING_FAILED; + } + + dst->values[dst->size++] = (c8) x; + } else { DA_DESTROY(*dst); return KIT_PARSING_FAILED; } -- cgit v1.2.3