#include "xml.h"

#include "input_buffer.h"
#include <assert.h>

typedef struct {
  ib_token_t    last;
  str_builder_t text;
  kit_da_xml_t  tags;
} kit_xml_intermediate_t;

static s32 kit_xml_alloc_and_unescape_(str_builder_t *dst, str_t str,
                                       kit_allocator_t *alloc) {
  assert(dst != NULL);
  assert(str.size == 0 || str.values != NULL);

  if (dst == NULL)
    return KIT_ERROR_INTERNAL;
  if (str.size != 0 && str.values == NULL)
    return KIT_ERROR_INTERNAL;

  DA_INIT(*dst, str.size, alloc);

  if (dst->size != str.size)
    return KIT_ERROR_OUT_OF_MEMORY;

  dst->size = 0;

  for (i64 i = 0; i < str.size; i++)
    if (str.values[i] != '&')
      dst->values[dst->size++] = str.values[i];
    else {
      i64 n = 1;
      while (i + n < str.size && str.values[i + n] != ';') n++;
      if (i + n >= str.size) {
        DA_DESTROY(*dst);
        return KIT_PARSING_FAILED;
      }
      if (n == 3 && memcmp(str.values + i, "&lt;", 4) == 0)
        dst->values[dst->size++] = '<';
      else if (n == 3 && memcmp(str.values + i, "&gt;", 4) == 0)
        dst->values[dst->size++] = '>';
      else if (n == 4 && memcmp(str.values + i, "&amp;", 5) == 0)
        dst->values[dst->size++] = '&';
      else if (n == 5 && memcmp(str.values + i, "&quot;", 6) == 0)
        dst->values[dst->size++] = '"';
      else if (n == 5 && memcmp(str.values + i, "&apos;", 6) == 0)
        dst->values[dst->size++] = '\'';
      else if (n - 2 <= 8 && str.values[i + 1] == '#' &&
               str.values[i + 2] == 'x') {
        //  hex encoding
        //

        c8  buf[8];
        u64 x = 0;

        memcpy(buf, str.values + (i + 3), n - 2);

        for (i64 k = 0; k < n - 2; k++) {
          c8 c = str.values[i + 3 + k];
          x <<= 8;
          if (c >= '0' && c <= '9')
            x |= (c - '0');
          else if (c >= 'a' && c <= 'f')
            x |= 10 + (c - 'a');
          else if (c >= 'A' && c <= 'F')
            x |= 10 + (c - 'A');
          else {
            x = 0;
            break;
          }
        }

        if (x == 0 || x > 255u) {
          //  TODO
          //  UTF-8 encoding

          DA_DESTROY(*dst);
          return KIT_PARSING_FAILED;
        }

        dst->values[dst->size++] = (c8) x;
      } else if (n - 1 <= 20 && str.values[i + 1] == '#') {
        //  dec encoding
        //

        c8  buf[20];
        u64 x = 0;

        memcpy(buf, str.values + (i + 2), n - 2);

        for (i64 k = 0; k < n - 1; k++) {
          c8 c = str.values[i + 2 + k];
          x *= 10;
          if (c >= '0' && c <= '9')
            x += (c - '0');
          else {
            x = 0;
            break;
          }
        }

        if (x == 0 || x > 255u) {
          //  TODO
          //  UTF-8 encoding

          DA_DESTROY(*dst);
          return KIT_PARSING_FAILED;
        }

        dst->values[dst->size++] = (c8) x;
      } else {
        DA_DESTROY(*dst);
        return KIT_PARSING_FAILED;
      }
      i += n;
    }

  return KIT_OK;
}

static ib_token_t kit_xml_parse_text_(ib_token_t     begin,
                                      str_builder_t *dst) {
  ib_token_t last = ib_until(begin, SZ("<"));

  DA_RESIZE(*dst, last.size);

  assert(dst->size == last.size);
  if (dst->size != last.size)
    last.status |= KIT_ERROR_OUT_OF_MEMORY
;
  else if (last.size > 0)
    memcpy(dst->values, ib_str(last).values, last.size);

  for (;;) {
    ib_token_t comment_open = ib_exact(last, SZ("<!--"));

    if (comment_open.status != KIT_OK)
      break;

    ib_token_t comment_text  = ib_until(comment_open, SZ("-->"));
    ib_token_t comment_close = ib_exact(comment_text, SZ("-->"));
    ib_token_t next_text     = ib_until(comment_close, SZ("<"));

    if (next_text.status == KIT_OK && next_text.size > 0) {
      i64 n = dst->size;
      DA_RESIZE(*dst, n + next_text.size);

      assert(dst->size == n + next_text.size);
      if (dst->size != n + next_text.size)
        next_text.status |= KIT_ERROR_OUT_OF_MEMORY
    ;
      else
        memcpy(dst->values + n, ib_str(next_text).values,
               ib_str(next_text).size);
    }

    last = next_text;
  }

  return last;
}

static ib_token_t kit_xml_parse_string_(ib_token_t  begin,
                                        ib_token_t *value) {
  assert(value != NULL);
  if (value == NULL) {
    begin.status |= KIT_ERROR_INTERNAL;
    return begin;
  }

  ib_token_t quotes_open = ib_exact(begin, SZ("\""));
  ib_token_t apostr_open = ib_exact(begin, SZ("'"));

  ib_token_t open = quotes_open.status == KIT_OK ? quotes_open
                                                 : apostr_open;

  *value           = ib_until(open, ib_str(open));
  ib_token_t close = ib_exact(*value, ib_str(open));

  return close;
}

static kit_xml_intermediate_t kit_xml_parse_buf_(
    ib_token_t begin, kit_allocator_t *alloc) {
  kit_xml_intermediate_t res;
  memset(&res, 0, sizeof res);

  ib_token_t last, spaces;
  memset(&last, 0, sizeof last);
  memset(&spaces, 0, sizeof spaces);

  str_builder_t tag_text_string;
  str_builder_t tag_tail_string;
  DA_INIT(tag_text_string, 0, alloc);
  DA_INIT(tag_tail_string, 0, alloc);

  ib_token_t tag_text = kit_xml_parse_text_(begin, &tag_text_string);
  last                = tag_text;

  DA_INIT(res.tags, 0, alloc);

  for (;;) {
    ib_token_t tagend_open = ib_exact(last, SZ("</"));
    if (tagend_open.status == KIT_OK)
      break;

    ib_token_t tag_open = ib_exact(last, SZ("<"));

    if (tag_open.status != KIT_OK)
      break;

    xml_t tag;
    memset(&tag, 0, sizeof tag);

    ib_token_t decl_open = ib_exact(tag_open, SZ("?"));

    if (decl_open.status == KIT_OK) {
      tag.is_declaration = 1;
      last               = decl_open;
    } else
      last = tag_open;

    spaces              = ib_any(last, SZ(" \t\r\n"));
    ib_token_t tag_name = ib_none(spaces, SZ(" \t\r\n/>"));

    DA_INIT(tag.properties, 0, alloc);

    last = tag_name;

    for (;;) {
      spaces              = ib_any(last, SZ(" \t\r\n"));
      ib_token_t property = ib_none(spaces, SZ(" \t\r\n=?/>"));

      if (property.status != KIT_OK || property.size == 0)
        break;

      spaces            = ib_any(property, SZ(" \t\r\n"));
      ib_token_t equals = ib_exact(spaces, SZ("="));
      spaces            = ib_any(equals, SZ(" \t\r\n"));

      ib_token_t value;
      last = kit_xml_parse_string_(spaces, &value);

      if (last.status == KIT_OK) {
        i64 n = tag.properties.size;
        DA_RESIZE(tag.properties, n + 1);

        assert(tag.properties.size == n + 1);
        if (tag.properties.size != n + 1) {
          last.status |= KIT_ERROR_OUT_OF_MEMORY
      ;
          DA_DESTROY(tag.properties);
        } else {
          last.status |= kit_xml_alloc_and_unescape_(
              &tag.properties.values[n].name, ib_str(property),
              alloc);
          last.status |= kit_xml_alloc_and_unescape_(
              &tag.properties.values[n].value, ib_str(value), alloc);
        }
      }
    }

    if (tag.is_declaration) {
      ib_token_t tag_decl_close = ib_exact(spaces, SZ("?>"));

      last = tag_decl_close;

      DA_INIT(tag.text, 0, alloc);
      DA_INIT(tag.children, 0, alloc);
    } else {
      ib_token_t tag_close       = ib_exact(spaces, SZ(">"));
      ib_token_t tag_close_empty = ib_exact(spaces, SZ("/>"));

      if (tag_close.status == KIT_OK) {
        kit_xml_intermediate_t im = kit_xml_parse_buf_(tag_close,
                                                       alloc);

        tag.text     = im.text;
        tag.children = im.tags;

        tagend_open             = ib_exact(im.last, SZ("</"));
        spaces                  = ib_any(tagend_open, SZ(" \t\r\n"));
        ib_token_t tagend_name  = ib_exact(spaces, ib_str(tag_name));
        spaces                  = ib_any(tagend_name, SZ(" \t\r\n"));
        ib_token_t tagend_close = ib_exact(spaces, SZ(">"));

        last = tagend_close;

      } else if (tag_close_empty.status == KIT_OK) {
        last = tag_close_empty;

        DA_INIT(tag.text, 0, alloc);
        DA_INIT(tag.children, 0, alloc);
      } else
        last.status |= KIT_PARSING_FAILED;
    }

    ib_token_t tag_tail = kit_xml_parse_text_(last, &tag_tail_string);

    last = tag_tail;

    if (last.status == KIT_OK) {
      i64 n = res.tags.size;
      DA_RESIZE(res.tags, n + 1);

      assert(res.tags.size == n + 1);
      if (res.tags.size != n + 1) {
        last.status |= KIT_ERROR_OUT_OF_MEMORY
    ;
        xml_destroy(&tag);
      } else {
        last.status |= kit_xml_alloc_and_unescape_(
            &tag.tag, ib_str(tag_name), alloc);
        last.status |= kit_xml_alloc_and_unescape_(
            &tag.tail, WRAP_STR(tag_tail_string), alloc);

        res.tags.values[n] = tag;
      }
    } else
      xml_destroy(&tag);
  }

  if (last.status != KIT_OK) {
    for (i64 i = 0; i < res.tags.size; i++)
      xml_destroy(res.tags.values + i);
    DA_DESTROY(res.text);
    DA_DESTROY(res.tags);
  } else
    last.status |= kit_xml_alloc_and_unescape_(
        &res.text, WRAP_STR(tag_text_string), alloc);

  DA_DESTROY(tag_text_string);
  DA_DESTROY(tag_tail_string);

  res.last = last;
  return res;
}

kit_xml_parse_result_t kit_xml_parse(is_handle_t      is,
                                     kit_allocator_t *alloc) {
  input_buffer_t         ib = ib_wrap(is, alloc);
  kit_xml_intermediate_t im = kit_xml_parse_buf_(ib_token(&ib),
                                                 alloc);

  kit_xml_parse_result_t res;
  memset(&res, 0, sizeof res);

  res.status = im.last.status;

  if (res.status != KIT_OK) {
    ib_destroy(&ib);
    return res;
  }

  if (im.text.size == 0 && im.tags.size == 1) {
    res.xml = im.tags.values[0];
    DA_DESTROY(im.text);
    DA_DESTROY(im.tags);
    ib_destroy(&ib);
    return res;
  }

  DA_INIT(res.xml.tag, 0, alloc);
  DA_INIT(res.xml.tail, 0, alloc);
  DA_INIT(res.xml.properties, 0, alloc);

  res.xml.text     = im.text;
  res.xml.children = im.tags;

  ib_destroy(&ib);
  return res;
}

kit_xml_text_t kit_xml_print(kit_xml_t *xml, kit_allocator_t *alloc) {
  //  TODO
  //

  assert(xml != NULL);

  xml_text_t result;
  memset(&result, 0, sizeof result);

  result.status = KIT_ERROR_NOT_IMPLEMENTED;
  return result;
}

static s32 kit_xml_append_text_(str_builder_t *buf, xml_t *xml) {
  assert(buf != NULL);
  assert(xml != NULL);

  i64 n = buf->size;
  DA_RESIZE(*buf, n + xml->text.size);

  assert(buf->size == n + xml->text.size);
  if (buf->size != n + xml->text.size)
    return KIT_ERROR_OUT_OF_MEMORY;

  if (xml->text.size > 0)
    memcpy(buf->values + n, xml->text.values, xml->text.size);

  for (i64 i = 0; i < xml->children.size; i++) {
    s32 s = kit_xml_append_text_(buf, xml->children.values + i);
    if (s != KIT_OK)
      return s;

    str_t tail = WRAP_STR(xml->children.values[i].tail);

    if (tail.size <= 0)
      continue;

    n = buf->size;
    DA_RESIZE(*buf, n + tail.size);

    assert(buf->size == n + tail.size);
    if (buf->size != n + tail.size)
      return KIT_ERROR_OUT_OF_MEMORY
;

    if (tail.size > 0)
      memcpy(buf->values + n, tail.values, tail.size);
  }

  return KIT_OK;
}

kit_xml_text_t kit_xml_full_text(kit_xml_t       *xml,
                                 kit_allocator_t *alloc) {
  kit_xml_text_t res;
  res.status = KIT_OK;
  DA_INIT(res.text, 0, alloc);

  if (xml != NULL)
    res.status = kit_xml_append_text_(&res.text, xml);
  else
    res.status = KIT_ERROR_INVALID_ARGUMENT;

  return res;
}

b8 kit_xml_has_property(kit_xml_t *xml, kit_str_t name) {
  assert(xml != NULL);
  if (xml == NULL)
    return 0;

  for (i64 i = 0; i < xml->properties.size; i++)
    if (AR_EQUAL(xml->properties.values[i].name, name))
      return 1;

  return 0;
}

str_t kit_xml_property(kit_xml_t *xml, str_t name) {
  assert(xml != NULL);
  if (xml == NULL)
    return str(0, NULL);

  for (i64 i = 0; i < xml->properties.size; i++)
    if (AR_EQUAL(xml->properties.values[i].name, name))
      return WRAP_STR(xml->properties.values[i].value);

  assert(0);
  return str(0, NULL);
}

void kit_xml_destroy(kit_xml_t *xml) {
  assert(xml != NULL);
  if (xml == NULL)
    return;

  for (i64 i = 0; i < xml->properties.size; i++) {
    DA_DESTROY(xml->properties.values[i].name);
    DA_DESTROY(xml->properties.values[i].value);
  }

  for (i64 i = 0; i < xml->children.size; i++)
    kit_xml_destroy(xml->children.values + i);

  DA_DESTROY(xml->tag);
  DA_DESTROY(xml->text);
  DA_DESTROY(xml->tail);

  DA_DESTROY(xml->properties);
  DA_DESTROY(xml->children);
}