From b76549bb7bb94832ed0c7a3985cd304ab38d6cd2 Mon Sep 17 00:00:00 2001 From: Mitya Selivanov Date: Tue, 11 Jun 2024 05:47:56 +0200 Subject: Add call and link API; Refactor --- bxgen.c | 321 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 275 insertions(+), 46 deletions(-) (limited to 'bxgen.c') diff --git a/bxgen.c b/bxgen.c index 6c9acf0..26cb4d9 100755 --- a/bxgen.c +++ b/bxgen.c @@ -25,6 +25,12 @@ exit $? // - Easy cross-compilation // - Platform-independent host // +// Inspirations +// +// - Cuik https://github.com/RealNeGate/Cuik +// - tinycc https://repo.or.cz/w/tinycc.git +// - QBE https://c9x.me/compile/ +// // To-Do list // // - ELF + x86_64 executable @@ -33,7 +39,9 @@ exit $? // - Proper error handling // - Proper prefixes for identifiers // - Effective entity allocation -// - Linked lists for large entities +// - Hybrid-linked lists for large entities +// - Implicit procedure prototypes +// - Implicit exit after ret from entry point // - Static single-assignment // - Sea of Nodes // - Optimization layers @@ -41,7 +49,7 @@ exit $? // - Memory reallocation when necessary // - JIT // - COFF, PE, OMF, Mach-O -// - i386, RISC-V, ARM +// - i386, RISC-V, ARM, WebAssembly // - Built-in standard library // // Bugs @@ -122,14 +130,36 @@ enum { // DATA_I64 = 0, + CTRL_CALL, CTRL_RET, + // Calling conventions + + CONV_CDECL = 0, + CONV_STDCALL, + CONV_FASTCALL, + CONV_THISCALL, + + // Primitive data types + // + + TYPE_I32 = 0, + + // Unit types + // + + UNIT_CODE = 0, + UNIT_LIBRARY_OBJECT, + UNIT_LIBRARY_STATIC, + UNIT_LIBRARY_DYNAMIC, + // Entity types // - TYPE_NODE = 0, - TYPE_PROC, - TYPE_UNIT, + ENTITY_TAIL = 0, + ENTITY_NODE, + ENTITY_PROC, + ENTITY_UNIT, // Limits // @@ -140,9 +170,11 @@ enum { // MAX_LITERAL_SIZE = 400, - MAX_NAME_SIZE = 128, - MAX_PROC_COUNT = 80, + MAX_NAME_SIZE = 80, + MAX_PROC_COUNT = 40, MAX_NODE_COUNT = 60, + MAX_LINK_COUNT = 20, + MAX_ARG_COUNT = 20, MAX_ENTITY_COUNT = 16384, }; @@ -150,13 +182,38 @@ enum { // and possible references to other nodes. // +typedef struct { + i16 size; + i16 type; + i64 node; +} var_t; + +typedef struct { + i16 val_count; + var_t vals[MAX_ARG_COUNT]; +} ret_t; + +typedef struct { + // NOTE + // We may call a local procedure by it's id, + // or a global procedure by name. + + i16 convention; // can be implicitly retrieved from the procedure + i64 target_proc; + i64 target_name_size; + c8 target_name[MAX_NAME_SIZE]; + i64 arg_count; + var_t args[MAX_ARG_COUNT]; +} call_t; + typedef struct { i16 op; i64 index_in_proc; union { - u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal - i64 lit_int; // integer literal - i64 ref_node[4]; // references to other nodes + u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal + i64 lit_int; // integer literal + ret_t ret; + call_t call; }; } node_t; @@ -165,11 +222,13 @@ typedef struct { // typedef struct { + i16 convention; i64 name_size; c8 name[MAX_NAME_SIZE]; i64 node_count; i64 nodes[MAX_NODE_COUNT]; i64 ret_index; + i64 unit; i64 index_in_unit; } proc_t; @@ -177,9 +236,14 @@ typedef struct { // typedef struct { - i64 entry_point; + i16 type; + i64 entry_point_index; + i64 name_size; + c8 name[MAX_NAME_SIZE]; i64 proc_count; i64 procs[MAX_PROC_COUNT]; + i64 link_count; + i64 links[MAX_LINK_COUNT]; } unit_t; // An entity can be any of: @@ -203,6 +267,9 @@ typedef struct { node_t node; proc_t proc; unit_t unit; + c8 tail_chars[1]; + i64 tail_ids[1]; + var_t tail_vars[1]; }; } entity_t; @@ -233,31 +300,42 @@ void pool_remove(pool_t *pool, i64 entity, i16 type); i64 node_init(pool_t *pool, node_t data); void node_destroy(pool_t *pool, i64 node); -i64 node_op_i64(pool_t *pool, i64 value); -i64 node_op_ret(pool_t *pool, i64 node_return_value); +i64 node_data_i64(pool_t *pool, i64 value); +i64 node_ctrl_call(pool_t *pool, i16 convention, i64 target_proc, i64 arg_count, var_t *args); +i64 node_ctrl_call_by_name(pool_t *pool, i16 convention, i64 name_size, c8 const *name, i64 arg_count, var_t *args); +i64 node_ctrl_ret(pool_t *pool, i64 value_count, var_t *values); i64 proc_init(pool_t *pool); void proc_destroy(pool_t *pool, i64 proc); +void proc_set_convention(pool_t *pool, i64 proc, i16 convention); void proc_set_name(pool_t *pool, i64 proc, i64 name_size, c8 const *name); void proc_node_add(pool_t *pool, i64 proc, i64 node); void proc_node_remove(pool_t *pool, i64 proc, i64 node); -i64 unit_init(pool_t *pool); +i64 unit_init(pool_t *pool, i16 type); void unit_destroy(pool_t *pool, i64 unit); void unit_proc_add(pool_t *pool, i64 unit, i64 proc); void unit_proc_remove(pool_t *pool, i64 unit, i64 proc); +void unit_link_add(pool_t *pool, i64 unit, i64 link_unit); +void unit_link_remove(pool_t *pool, i64 unit, i64 link_unit); +void unit_set_name(pool_t *pool, i64 unit, i64 name_size, c8 const *name); void unit_set_entry_point(pool_t *pool, i64 unit, i64 entry_point_proc); void unit_write(pool_t *pool, i64 unit, u16 target, FILE *out); #ifndef DISABLE_HELPERS i64 n_i64(i64 value); -i64 n_ret(i64 node_return_value); +i64 n_call(i16 convention, i64 target_proc, i64 arg_count, var_t *args); +i64 n_call_by_name(i16 convention, c8 const *name, i64 arg_count, var_t *args); +i64 n_ret(i64 val_count, var_t *vals); i64 p_new(c8 const *name); void p_add(i64 proc, i64 node); i64 u_new(); void u_add(i64 unit, i64 proc); void u_entry_point(i64 unit, i64 proc); void u_elf_x86_64(i64 unit, c8 const *output_file_name); +void l_code(i64 unit, i64 link_unit); +void l_object(i64 unit, c8 const *object_library); +void l_static(i64 unit, c8 const *static_library); #endif #ifdef __cplusplus @@ -305,14 +383,14 @@ i64 node_init(pool_t *pool, node_t data) { data.index_in_proc = UNDEFINED; return pool_add(pool, (entity_t) { - .type = TYPE_NODE, + .type = ENTITY_NODE, .tail = UNDEFINED, .node = data, }); } void node_destroy(pool_t *pool, i64 node) { - pool_remove(pool, node, TYPE_NODE); + pool_remove(pool, node, ENTITY_NODE); } i64 node_data_i64(pool_t *pool, i64 value) { @@ -322,16 +400,61 @@ i64 node_data_i64(pool_t *pool, i64 value) { }); } -i64 node_ctrl_ret(pool_t *pool, i64 node_return_value) { +i64 node_ctrl_call(pool_t *pool, i16 convention, i64 target_proc, i64 arg_count, var_t *args) { + assert(arg_count <= MAX_ARG_COUNT); + + call_t call = { + .convention = convention, + .target_proc = target_proc, + .arg_count = arg_count, + }; + + if (arg_count > 0) + memcpy(call.args, args, arg_count * sizeof *args); + + return node_init(pool, (node_t) { + .op = CTRL_CALL, + .call = call, + }); +} + +i64 node_ctrl_call_by_name(pool_t *pool, i16 convention, i64 name_size, c8 const *name, i64 arg_count, var_t *args) { + assert(arg_count <= MAX_ARG_COUNT); + + call_t call = { + .convention = convention, + .target_name_size = name_size, + .arg_count = arg_count, + }; + + if (name_size > 0) + memcpy(call.target_name, name, name_size); + if (arg_count > 0) + memcpy(call.args, args, arg_count * sizeof *args); + + return node_init(pool, (node_t) { + .op = CTRL_CALL, + .call = call, + }); +} + +i64 node_ctrl_ret(pool_t *pool, i64 value_count, var_t *values) { + assert(value_count <= MAX_ARG_COUNT); + + ret_t ret = { .val_count = value_count, }; + + if (value_count > 0) + memcpy(ret.vals, values, value_count * sizeof *values); + return node_init(pool, (node_t) { - .op = CTRL_RET, - .ref_node = { node_return_value, 0, }, + .op = CTRL_RET, + .ret = ret, }); } i64 proc_init(pool_t *pool) { return pool_add(pool, (entity_t) { - .type = TYPE_PROC, + .type = ENTITY_PROC, .tail = UNDEFINED, .proc = (proc_t) { .ret_index = UNDEFINED, @@ -341,30 +464,41 @@ i64 proc_init(pool_t *pool) { } void proc_destroy(pool_t *pool, i64 proc) { - pool_remove(pool, proc, TYPE_PROC); + pool_remove(pool, proc, ENTITY_PROC); +} + +void proc_set_convention(pool_t *pool, i64 proc, i16 convention) { + assert(pool != NULL && pool->entities != NULL); + assert(pool->entities[proc].is_enabled); + assert(pool->entities[proc].type == ENTITY_PROC); + + pool->entities[proc].proc.convention = convention; } void proc_set_name(pool_t *pool, i64 proc, i64 name_size, c8 const *name) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[proc].is_enabled); - assert(pool->entities[proc].type == TYPE_PROC); + assert(pool->entities[proc].type == ENTITY_PROC); // TODO // Implement large entities. + assert(name_size <= MAX_NAME_SIZE); - assert(name_size > 0); + assert(name_size >= 0); proc_t *p = &pool->entities[proc].proc; p->name_size = name_size; - memcpy(p->name, name, name_size); + + if (name_size > 0) + memcpy(p->name, name, name_size); } void proc_node_add(pool_t *pool, i64 proc, i64 node) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[proc].is_enabled); - assert(pool->entities[proc].type == TYPE_PROC); + assert(pool->entities[proc].type == ENTITY_PROC); assert(pool->entities[node].is_enabled); - assert(pool->entities[node].type == TYPE_NODE); + assert(pool->entities[node].type == ENTITY_NODE); proc_t *p = &pool->entities[proc].proc; node_t *n = &pool->entities[node].node; @@ -395,8 +529,8 @@ void proc_node_add(pool_t *pool, i64 proc, i64 node) { void proc_node_remove(pool_t *pool, i64 proc, i64 node) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[proc].is_enabled); - assert(pool->entities[proc].type == TYPE_PROC); - assert(pool->entities[node].type == TYPE_NODE); + assert(pool->entities[proc].type == ENTITY_PROC); + assert(pool->entities[node].type == ENTITY_NODE); proc_t *p = &pool->entities[proc].proc; node_t *n = &pool->entities[node].node; @@ -416,26 +550,27 @@ void proc_node_remove(pool_t *pool, i64 proc, i64 node) { n->index_in_proc = UNDEFINED; } -i64 unit_init(pool_t *pool) { +i64 unit_init(pool_t *pool, i16 type) { return pool_add(pool, (entity_t) { - .type = TYPE_UNIT, + .type = ENTITY_UNIT, .tail = UNDEFINED, .unit = (unit_t) { - .entry_point = UNDEFINED, + .type = type, + .entry_point_index = UNDEFINED, } }); } void unit_destroy(pool_t *pool, i64 unit) { - pool_remove(pool, unit, TYPE_UNIT); + pool_remove(pool, unit, ENTITY_UNIT); } void unit_proc_add(pool_t *pool, i64 unit, i64 proc) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[unit].is_enabled); - assert(pool->entities[unit].type == TYPE_UNIT); + assert(pool->entities[unit].type == ENTITY_UNIT); assert(pool->entities[proc].is_enabled); - assert(pool->entities[proc].type == TYPE_PROC); + assert(pool->entities[proc].type == ENTITY_PROC); unit_t *u = &pool->entities[unit].unit; proc_t *p = &pool->entities[proc].proc; @@ -457,8 +592,8 @@ void unit_proc_add(pool_t *pool, i64 unit, i64 proc) { void unit_proc_remove(pool_t *pool, i64 unit, i64 proc) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[unit].is_enabled); - assert(pool->entities[unit].type == TYPE_UNIT); - assert(pool->entities[proc].type == TYPE_PROC); + assert(pool->entities[unit].type == ENTITY_UNIT); + assert(pool->entities[proc].type == ENTITY_PROC); unit_t *u = &pool->entities[unit].unit; proc_t *p = &pool->entities[proc].proc; @@ -469,17 +604,86 @@ void unit_proc_remove(pool_t *pool, i64 unit, i64 proc) { assert(p->index_in_unit != UNDEFINED); assert(u->procs[p->index_in_unit] == proc); + if (u->entry_point_index == p->index_in_unit) + u->entry_point_index = UNDEFINED; + u->procs[p->index_in_unit] = UNDEFINED; p->index_in_unit = UNDEFINED; } +void unit_link_add(pool_t *pool, i64 unit, i64 link_unit) { + assert(pool != NULL && pool->entities != NULL); + assert(pool->entities[unit].is_enabled); + assert(pool->entities[unit].type == ENTITY_UNIT); + assert(pool->entities[link_unit].is_enabled); + assert(pool->entities[link_unit].type == ENTITY_UNIT); + + unit_t *u = &pool->entities[unit].unit; + + for (i64 i = 0; i < u->link_count; ++i) + if (u->links[i] == link_unit) + return; + + assert(u->link_count < MAX_LINK_COUNT); + u->links[u->link_count++] = link_unit; +} + +void unit_link_remove(pool_t *pool, i64 unit, i64 link_unit) { + assert(pool != NULL && pool->entities != NULL); + assert(pool->entities[unit].is_enabled); + assert(pool->entities[unit].type == ENTITY_UNIT); + assert(pool->entities[link_unit].type == ENTITY_UNIT); + + unit_t *u = &pool->entities[unit].unit; + + for (i64 i = 0; i < u->link_count; ++i) + if (u->links[i] == link_unit) { + u->links[i] = UNDEFINED; + return; + } + + assert(0); +} + +void unit_set_name(pool_t *pool, i64 unit, i64 name_size, c8 const *name) { + assert(pool != NULL && pool->entities != NULL); + assert(pool->entities[unit].is_enabled); + assert(pool->entities[unit].type == ENTITY_UNIT); + + // TODO + // Implement large entities. + + assert(name_size <= MAX_NAME_SIZE); + assert(name_size >= 0); + + unit_t *u = &pool->entities[unit].unit; + u->name_size = name_size; + + if (name_size > 0) + memcpy(u->name, name, name_size); +} + void unit_set_entry_point(pool_t *pool, i64 unit, i64 entry_point_proc) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[unit].is_enabled); - assert(pool->entities[unit].type == TYPE_UNIT); - assert(pool->entities[entry_point_proc].type == TYPE_PROC); + assert(pool->entities[unit].type == ENTITY_UNIT); + + unit_t *u = &pool->entities[unit].unit; + + if (entry_point_proc == UNDEFINED) { + u->entry_point_index = UNDEFINED; + return; + } - pool->entities[unit].unit.entry_point = entry_point_proc; + assert(pool->entities[entry_point_proc].is_enabled); + assert(pool->entities[entry_point_proc].type == ENTITY_PROC); + + proc_t *p = &pool->entities[entry_point_proc].proc; + + assert(p->index_in_unit != UNDEFINED); + assert(u->procs[p->index_in_unit] == entry_point_proc); + + pool->entities[unit].unit.entry_point_index = p->index_in_unit; } // Code generation proc @@ -491,7 +695,7 @@ void unit_write(pool_t *pool, i64 unit, u16 target, FILE *out) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[unit].is_enabled); - assert(pool->entities[unit].unit.entry_point != UNDEFINED); + assert(pool->entities[unit].unit.entry_point_index != UNDEFINED); assert(out != NULL); assert(target == (FORMAT_ELF | ARCH_X86_64)); @@ -609,8 +813,16 @@ i64 n_i64(i64 value) { return node_data_i64(&g_pool, value); } -i64 n_ret(i64 node_return_value) { - return node_ctrl_ret(&g_pool, node_return_value); +i64 n_call(i16 convention, i64 target_proc, i64 arg_count, var_t *args) { + return node_ctrl_call(&g_pool, convention, target_proc, arg_count, args); +} + +i64 n_call_by_name(i16 convention, c8 const *name, i64 arg_count, var_t *args) { + return node_ctrl_call_by_name(&g_pool, convention, strlen(name), name, arg_count, args); +} + +i64 n_ret(i64 val_count, var_t *vals) { + return node_ctrl_ret(&g_pool, val_count, vals); } i64 p_new(c8 const *name) { @@ -624,7 +836,7 @@ void p_add(i64 proc, i64 node) { } i64 u_new() { - return unit_init(&g_pool); + return unit_init(&g_pool, UNIT_CODE); } void u_add(i64 unit, i64 proc) { @@ -645,6 +857,22 @@ void u_elf_x86_64(i64 unit, c8 const *output_file_name) { fclose(f); } +void l_code(i64 unit, i64 link_unit) { + unit_link_add(&g_pool, unit, link_unit); +} + +void l_object(i64 unit, c8 const *object_library) { + i64 l = unit_init(&g_pool, UNIT_LIBRARY_OBJECT); + unit_set_name(&g_pool, l, strlen(object_library), object_library); + unit_link_add(&g_pool, unit, l); +} + +void l_static(i64 unit, c8 const *static_library) { + i64 l = unit_init(&g_pool, UNIT_LIBRARY_STATIC); + unit_set_name(&g_pool, l, strlen(static_library), static_library); + unit_link_add(&g_pool, unit, l); +} + #endif // ================================================================ @@ -653,7 +881,7 @@ void u_elf_x86_64(i64 unit, c8 const *output_file_name) { // // ================================================================ -#ifndef DISABLE_EXAMPLE +#if !defined(DISABLE_HELPERS) && !defined(DISABLE_EXAMPLE) int main(int argc, char **argv) { (void) argc; @@ -667,11 +895,12 @@ int main(int argc, char **argv) { i64 main = p_new("main"); i64 n0 = n_i64(42); p_add(main, n0); - p_add(main, n_ret(n0)); + p_add(main, n_ret(1, (var_t[]) { {.size = 4, .type = TYPE_I32, .node = n0, } })); i64 u = u_new(); u_add(u, main); u_entry_point(u, main); + l_static(u, "/lib/x86_64-linux-gnu/libc.a"); u_elf_x86_64(u, "test_foo"); printf("\nBye!\n"); -- cgit v1.2.3