#if 0 SRC=${0##*/} BIN=${SRC%.*} gcc -o $BIN.tmp $SRC && ./$BIN.tmp $@ && rm $BIN.tmp exit $? #endif // ================================================================ // // Binary code generation compiler backend // // Qualities // // - Single source file // - Simple and flexible API // - No external dependencies // - No configuration required // - No dynamic memory management // // TODO // // - ELF + x86_64 executable // - x86_64 object file // - Linking libraries // - Effective entity allocation // - Linked lists for large entities // - Sea of Nodes // - Optimization layers // - Multithreading // - COFF, PE, OMF, Mach-O // - i386, RISC-V, ARM // // ================================================================ // // Basic declarations // // ================================================================ #include #include #include #include #include #include typedef signed char i8; typedef signed short i16; typedef signed int i32; typedef signed long long i64; typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; typedef float f32; typedef double f64; typedef signed char b8; typedef int b32; typedef char c8; // ================================================================ // // Intermediate representation // // ================================================================ // Declarations // enum { // For indices UNDEFINED = -1, // Formats // FORMAT_ELF = 1, FORMAT_COFF, FORMAT_PE, FORMAT_OMF, FORMAT_MATCH_O, // Architecture // ARCH_RISC_V = 64, ARCH_I386, ARCH_X86_64, ARCH_ARM, // Sea of Nodes flow type // FLOW_DATA = 0, FLOW_CONTROL, // Semantic node operations // OP_I64 = 0, OP_RET, // Object file UNIT_OBJ = 0, // Executable UNIT_EXE, // Entity types // TYPE_NODE = 0, TYPE_PROC, TYPE_UNIT, // Limits // // NOTE // // All limits can be exceeded using the linked list of entities // (see `entity_t::tail`), except for `MAX_ENTITY_COUNT`. // MAX_LITERAL_SIZE = 400, MAX_NAME_SIZE = 128, MAX_PROC_COUNT = 80, MAX_NODE_COUNT = 60, MAX_ENTITY_COUNT = 16384, }; // A semantic node is an operation with optional data // and possible references to other nodes. // typedef struct { i16 op; i8 flow; union { u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal i64 lit_int; // integer literal i64 ref_node[4]; // references to other nodes }; } node_t; // A procedure is a collection of semantic nodes // and has a string name. // typedef struct { i64 name_size; c8 name[MAX_NAME_SIZE]; i64 node_count; i64 nodes[MAX_NODE_COUNT]; } proc_t; // A compilation unit is a collection of procedures. // typedef struct { i16 type; i64 entry_point; i64 proc_count; i64 procs[MAX_PROC_COUNT]; } unit_t; // An entity can be any of: // - `node_t` // - `proc_t` // - `unit_t` // // Every entity can be referenced by it's unique index // in the entity pool. // // If the entity's data doesn't fit in one entity, tail is // an index that leads to the entity with the rest of the // data, forming a linked list. // typedef struct { b8 is_enabled; i16 type; i64 tail; union { node_t node; proc_t proc; unit_t unit; }; } entity_t; // Pool, a collection of all entities. // typedef struct { i64 entity_count; entity_t entities[MAX_ENTITY_COUNT]; } entity_pool_t; // IR building procs // i64 pool_entity_add(entity_pool_t *pool, entity_t data) { assert(pool != NULL); assert(pool->entity_count < MAX_ENTITY_COUNT); i64 id = pool->entity_count++; data.is_enabled = 1, pool->entities[id] = data; return id; } void pool_entity_remove(entity_pool_t *pool, i64 entity, i16 type) { assert(pool != NULL); assert(pool->entities[entity].is_enabled); assert(pool->entities[entity].type == type); pool->entities[entity].is_enabled = 1; } i64 node_init(entity_pool_t *pool, node_t data) { return pool_entity_add(pool, (entity_t) { .type = TYPE_NODE, .tail = UNDEFINED, .node = data, }); } void node_destroy(entity_pool_t *pool, i64 node) { pool_entity_remove(pool, node, TYPE_NODE); } i64 node_op_i64(entity_pool_t *pool, i64 value) { return node_init(pool, (node_t) { .op = OP_I64, .flow = FLOW_DATA, .lit_int = value, }); } i64 node_op_ret(entity_pool_t *pool, i64 node_return_value) { return node_init(pool, (node_t) { .op = OP_RET, .flow = FLOW_CONTROL, .ref_node = node_return_value, }); } i64 proc_init(entity_pool_t *pool) { return pool_entity_add(pool, (entity_t) { .type = TYPE_PROC, .tail = UNDEFINED, }); } void proc_destroy(entity_pool_t *pool, i64 proc) { pool_entity_remove(pool, proc, TYPE_PROC); } void proc_set_name(entity_pool_t *pool, i64 proc, i64 name_size, c8 const *name) { assert(pool != NULL); assert(pool->entities[proc].is_enabled); assert(pool->entities[proc].type == TYPE_PROC); // TODO // Implement large entities. assert(name_size <= MAX_NAME_SIZE); assert(name_size > 0); proc_t *p = &pool->entities[proc].proc; p->name_size = name_size; memcpy(p->name, name, name_size); } void proc_node_add(entity_pool_t *pool, i64 proc, i64 node) { assert(pool != NULL); assert(pool->entities[proc].is_enabled); assert(pool->entities[proc].type == TYPE_PROC); assert(pool->entities[node].is_enabled); assert(pool->entities[node].type == TYPE_NODE); proc_t *p = &pool->entities[proc].proc; // TODO // Implement large entities. assert(p->node_count < MAX_NODE_COUNT); p->nodes[p->node_count++] = node; } void proc_node_remove(entity_pool_t *pool, i64 proc, i64 node) { assert(pool != NULL); assert(pool->entities[proc].is_enabled); assert(pool->entities[proc].type == TYPE_PROC); assert(pool->entities[node].type == TYPE_NODE); pool->entities[proc].proc.nodes[node] = UNDEFINED; } i64 unit_init(entity_pool_t *pool) { return pool_entity_add(pool, (entity_t) { .type = TYPE_UNIT, .tail = UNDEFINED, .unit = (unit_t) { .entry_point = UNDEFINED, } }); } void unit_destroy(entity_pool_t *pool, i64 unit) { pool_entity_remove(pool, unit, TYPE_UNIT); } void unit_proc_add(entity_pool_t *pool, i64 unit, i64 proc) { assert(pool != NULL); assert(pool->entities[unit].is_enabled); assert(pool->entities[unit].type == TYPE_UNIT); assert(pool->entities[proc].is_enabled); assert(pool->entities[proc].type == TYPE_PROC); unit_t *u = &pool->entities[unit].unit; // TODO // Implement large entities. assert(u->proc_count < MAX_PROC_COUNT); u->procs[u->proc_count++] = proc; } void unit_proc_remove(entity_pool_t *pool, i64 unit, i64 proc) { assert(pool != NULL); assert(pool->entities[unit].is_enabled); assert(pool->entities[unit].type == TYPE_UNIT); assert(pool->entities[proc].type == TYPE_PROC); pool->entities[unit].unit.procs[proc] = UNDEFINED; } void unit_set_entry_point(entity_pool_t *pool, i64 unit, i64 entry_point_proc) { assert(pool != NULL); assert(pool->entities[unit].is_enabled); assert(pool->entities[unit].type == TYPE_UNIT); assert(pool->entities[entry_point_proc].type == TYPE_PROC); pool->entities[unit].unit.entry_point = entry_point_proc; } // ================================================================ // // Code generation proc // // ================================================================ void unit_write(entity_pool_t *pool, i64 unit, i16 target, FILE *out) { assert(pool != NULL); assert(pool->entities[unit].is_enabled); assert(pool->entities[unit].unit.entry_point != UNDEFINED); assert(out != NULL); assert(target == (FORMAT_ELF | ARCH_X86_64)); printf("Writing ELF x86_64 executable...\n"); u16 ehs = 64; u16 shs = 0; u16 phs = 56; u64 align = 8; u8 code[16] = { 0xb8, // mov rax 0x3c, 0, 0, 0, // 60 // exit 0x48, 0x31, 0xff, // xor rdx, rdx // rdx = 0 0x0f, 0x05, // syscall }; u64 code_offset = (ehs + phs + (align - 1)) & (~(align - 1)); u64 code_size = sizeof code; assert((code_offset % align) == 0); assert((code_size % align) == 0); u64 entry = 0x400000 + ehs + phs; // ELF header // fwrite( (u8[4]) { 0x7f, 'E', 'L', 'F' }, 1, 4, out); // magic fwrite(&(u8) { 2 }, 1, 1, out); // elf64 fwrite(&(u8) { 1 }, 1, 1, out); // 2's complement, little endian fwrite(&(u8) { 1 }, 1, 1, out); // current version fwrite(&(u8) { 0 }, 1, 1, out); // SysV fwrite(&(u8) { 0 }, 1, 1, out); // ABI version fwrite( (u8[7]) { 0 }, 1, 7, out); // padding fwrite(&(u16) { 2 }, 2, 1, out); // executable fwrite(&(u16) { 62 }, 2, 1, out); // x86_64 fwrite(&(u32) { 1 }, 4, 1, out); // current version fwrite(&(u64) { entry }, 8, 1, out); // entry point address fwrite(&(u64) { ehs }, 8, 1, out); // program header offset fwrite(&(u64) { 0 }, 8, 1, out); // section header offset fwrite(&(u32) { 0 }, 4, 1, out); // flags fwrite(&(u16) { ehs }, 2, 1, out); // ELF header size fwrite(&(u16) { phs }, 2, 1, out); // program header size fwrite(&(u16) { 1 }, 2, 1, out); // program header count fwrite(&(u16) { shs }, 2, 1, out); // section header size fwrite(&(u16) { 0 }, 2, 1, out); // section header count fwrite(&(u16) { 0 }, 2, 1, out); // string table section header index // Program header // fwrite(&(u32) { 1 }, 4, 1, out); // type (PT_LOAD) fwrite(&(u32) { 5 }, 4, 1, out); // flags (PF_X | PF_R) fwrite(&(u64) { code_offset }, 8, 1, out); // offset fwrite(&(u64) { entry }, 8, 1, out); // vaddr fwrite(&(u64) { entry }, 8, 1, out); // paddr fwrite(&(u64) { code_size }, 8, 1, out); // filesz fwrite(&(u64) { code_size }, 8, 1, out); // memsz fwrite(&(u64) { 8 }, 8, 1, out); // align // Code // for (i64 i = code_offset - ehs - phs; i > 0; --i) fwrite(&(u8) { 0 }, 1, 1, out); fwrite(code, 1, code_size, out); } // ================================================================ // // Helpers // // ================================================================ // Global state // static entity_pool_t g_pool = { 0 }; // Handy procedures // i64 n_i64(i64 value) { return node_op_i64(&g_pool, value); } i64 n_ret(i64 node_return_value) { return node_op_ret(&g_pool, node_return_value); } i64 p_new(c8 const *name) { i64 p = proc_init(&g_pool); proc_set_name(&g_pool, p, strlen(name), name); return p; } void p_add(i64 proc, i64 node) { proc_node_add(&g_pool, proc, node); } i64 u_new() { return unit_init(&g_pool); } void u_add(i64 unit, i64 proc) { unit_proc_add(&g_pool, unit, proc); } void u_entry_point(i64 unit, i64 proc) { unit_set_entry_point(&g_pool, unit, proc); } void u_elf_x86_64(i64 unit, c8 const *output_file_name) { FILE *f = fopen(output_file_name, "wb"); assert(f != NULL); unit_write(&g_pool, unit, FORMAT_ELF | ARCH_X86_64, f); fclose(f); chmod(output_file_name, 0775); } // ================================================================ // // Example // // ================================================================ int main(int argc, char **argv) { printf("node - %d bytes\n", (i32) sizeof(node_t)); printf("proc - %d bytes\n", (i32) sizeof(proc_t)); printf("unit - %d bytes\n", (i32) sizeof(unit_t)); i64 main = p_new("main"); i64 n0 = n_i64(42); p_add(main, n0); p_add(main, n_ret(n0)); i64 u = u_new(); u_add(u, main); u_entry_point(u, main); u_elf_x86_64(u, "test_foo"); printf("\nBye!\n"); return 0; }