diff options
author | Mitya Selivanov <automainint@guattari.tech> | 2024-06-09 21:28:30 +0200 |
---|---|---|
committer | Mitya Selivanov <automainint@guattari.tech> | 2024-06-09 21:28:30 +0200 |
commit | cc1cd3f8f48dc506086715bb207dfbf175c11575 (patch) | |
tree | 8625d6ab13d4c5374b880b935bc1c1e15aca24ef /binary_code_generation.c | |
parent | 31f95ee266e507a3155a82899531851c7202f0a3 (diff) | |
download | bxgen-cc1cd3f8f48dc506086715bb207dfbf175c11575.zip |
Cleanup; docs
Diffstat (limited to 'binary_code_generation.c')
-rwxr-xr-x | binary_code_generation.c | 519 |
1 files changed, 0 insertions, 519 deletions
diff --git a/binary_code_generation.c b/binary_code_generation.c deleted file mode 100755 index 053baa6..0000000 --- a/binary_code_generation.c +++ /dev/null @@ -1,519 +0,0 @@ -#if 0 -SRC=${0##*/} -BIN=${SRC%.*} -gcc -o $BIN.tmp $SRC && ./$BIN.tmp $@ && rm $BIN.tmp -exit $? -#endif - -// ================================================================ -// -// Binary code generation compiler backend -// -// Qualities -// -// - Single source file -// - Simple and flexible API -// - No external dependencies -// - No configuration required -// - No dynamic memory management -// -// To-Do list -// -// - ELF + x86_64 executable -// - x86_64 object file -// - Linking libraries -// - Proper error handling -// - Effective entity allocation -// - Linked lists for large entities -// - Sea of Nodes -// - Optimization layers -// - Multithreading -// - Memory reallocation when necessary -// - COFF, PE, OMF, Mach-O -// - i386, RISC-V, ARM -// -// Done -// -// - ELF header -// -// ================================================================ -// -// Basic declarations -// -// ================================================================ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> - -#ifdef __unix__ -#include <sys/types.h> -#include <sys/stat.h> -#endif - -typedef signed char i8; -typedef signed short i16; -typedef signed int i32; -typedef signed long long i64; -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; -typedef float f32; -typedef double f64; -typedef signed char b8; -typedef int b32; -typedef char c8; - -// ================================================================ -// -// Intermediate representation -// -// ================================================================ - -// Declarations -// - -enum { - // For indices - UNDEFINED = -1, - - // Formats - // - - FORMAT_ELF = 1, - FORMAT_COFF, - FORMAT_PE, - FORMAT_OMF, - FORMAT_MATCH_O, - - // Architecture - // - - ARCH_RISC_V = 64, - ARCH_I386, - ARCH_X86_64, - ARCH_ARM, - - // Sea of Nodes flow type - // - - FLOW_DATA = 0, - FLOW_CONTROL, - - // Semantic node operations - // - - OP_I64 = 0, - OP_RET, - - // Entity types - // - - TYPE_NODE = 0, - TYPE_PROC, - TYPE_UNIT, - - // Limits - // - // NOTE - // - // All limits can be exceeded using the linked list of entities - // (see `entity_t::tail`), except for `MAX_ENTITY_COUNT`. - // - - MAX_LITERAL_SIZE = 400, - MAX_NAME_SIZE = 128, - MAX_PROC_COUNT = 80, - MAX_NODE_COUNT = 60, - MAX_ENTITY_COUNT = 16384, -}; - -// A semantic node is an operation with optional data -// and possible references to other nodes. -// - -typedef struct { - i16 op; - i8 flow; - union { - u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal - i64 lit_int; // integer literal - i64 ref_node[4]; // references to other nodes - }; -} node_t; - -// A procedure is a collection of semantic nodes -// and has a string name. -// - -typedef struct { - i64 name_size; - c8 name[MAX_NAME_SIZE]; - i64 node_count; - i64 nodes[MAX_NODE_COUNT]; -} proc_t; - -// A compilation unit is a collection of procedures. -// - -typedef struct { - i64 entry_point; - i64 proc_count; - i64 procs[MAX_PROC_COUNT]; -} unit_t; - -// An entity can be any of: -// - `node_t` -// - `proc_t` -// - `unit_t` -// -// Every entity can be referenced by it's unique index -// in the entity pool. -// -// If the entity's data doesn't fit in one entity, tail is -// an index that leads to the entity with the rest of the -// data, forming a linked list. -// - -typedef struct { - b8 is_enabled; - i16 type; - i64 tail; - union { - node_t node; - proc_t proc; - unit_t unit; - }; -} entity_t; - -// Pool, a collection of all entities. -// - -typedef struct { - i64 entity_count; - entity_t entities[MAX_ENTITY_COUNT]; -} entity_pool_t; - -// IR building procs -// - -i64 pool_entity_add(entity_pool_t *pool, entity_t data) { - assert(pool != NULL); - assert(pool->entity_count < MAX_ENTITY_COUNT); - - i64 id = pool->entity_count++; - data.is_enabled = 1, - pool->entities[id] = data; - - return id; -} - -void pool_entity_remove(entity_pool_t *pool, i64 entity, i16 type) { - assert(pool != NULL); - assert(pool->entities[entity].is_enabled); - assert(pool->entities[entity].type == type); - - pool->entities[entity].is_enabled = 1; -} - -i64 node_init(entity_pool_t *pool, node_t data) { - return pool_entity_add(pool, (entity_t) { - .type = TYPE_NODE, - .tail = UNDEFINED, - .node = data, - }); -} - -void node_destroy(entity_pool_t *pool, i64 node) { - pool_entity_remove(pool, node, TYPE_NODE); -} - -i64 node_op_i64(entity_pool_t *pool, i64 value) { - return node_init(pool, (node_t) { - .op = OP_I64, - .flow = FLOW_DATA, - .lit_int = value, - }); -} - -i64 node_op_ret(entity_pool_t *pool, i64 node_return_value) { - return node_init(pool, (node_t) { - .op = OP_RET, - .flow = FLOW_CONTROL, - .ref_node = node_return_value, - }); -} - -i64 proc_init(entity_pool_t *pool) { - return pool_entity_add(pool, (entity_t) { - .type = TYPE_PROC, - .tail = UNDEFINED, - }); -} - -void proc_destroy(entity_pool_t *pool, i64 proc) { - pool_entity_remove(pool, proc, TYPE_PROC); -} - -void proc_set_name(entity_pool_t *pool, i64 proc, i64 name_size, c8 const *name) { - assert(pool != NULL); - assert(pool->entities[proc].is_enabled); - assert(pool->entities[proc].type == TYPE_PROC); - - // TODO - // Implement large entities. - assert(name_size <= MAX_NAME_SIZE); - assert(name_size > 0); - - proc_t *p = &pool->entities[proc].proc; - p->name_size = name_size; - memcpy(p->name, name, name_size); -} - -void proc_node_add(entity_pool_t *pool, i64 proc, i64 node) { - assert(pool != NULL); - assert(pool->entities[proc].is_enabled); - assert(pool->entities[proc].type == TYPE_PROC); - assert(pool->entities[node].is_enabled); - assert(pool->entities[node].type == TYPE_NODE); - - proc_t *p = &pool->entities[proc].proc; - - // TODO - // Implement large entities. - assert(p->node_count < MAX_NODE_COUNT); - - p->nodes[p->node_count++] = node; -} - -void proc_node_remove(entity_pool_t *pool, i64 proc, i64 node) { - assert(pool != NULL); - assert(pool->entities[proc].is_enabled); - assert(pool->entities[proc].type == TYPE_PROC); - assert(pool->entities[node].type == TYPE_NODE); - - pool->entities[proc].proc.nodes[node] = UNDEFINED; -} - -i64 unit_init(entity_pool_t *pool) { - return pool_entity_add(pool, (entity_t) { - .type = TYPE_UNIT, - .tail = UNDEFINED, - .unit = (unit_t) { - .entry_point = UNDEFINED, - } - }); -} - -void unit_destroy(entity_pool_t *pool, i64 unit) { - pool_entity_remove(pool, unit, TYPE_UNIT); -} - -void unit_proc_add(entity_pool_t *pool, i64 unit, i64 proc) { - assert(pool != NULL); - assert(pool->entities[unit].is_enabled); - assert(pool->entities[unit].type == TYPE_UNIT); - assert(pool->entities[proc].is_enabled); - assert(pool->entities[proc].type == TYPE_PROC); - - unit_t *u = &pool->entities[unit].unit; - - // TODO - // Implement large entities. - assert(u->proc_count < MAX_PROC_COUNT); - - u->procs[u->proc_count++] = proc; -} - -void unit_proc_remove(entity_pool_t *pool, i64 unit, i64 proc) { - assert(pool != NULL); - assert(pool->entities[unit].is_enabled); - assert(pool->entities[unit].type == TYPE_UNIT); - assert(pool->entities[proc].type == TYPE_PROC); - - pool->entities[unit].unit.procs[proc] = UNDEFINED; -} - -void unit_set_entry_point(entity_pool_t *pool, i64 unit, i64 entry_point_proc) { - assert(pool != NULL); - assert(pool->entities[unit].is_enabled); - assert(pool->entities[unit].type == TYPE_UNIT); - assert(pool->entities[entry_point_proc].type == TYPE_PROC); - - pool->entities[unit].unit.entry_point = entry_point_proc; -} - -// ================================================================ -// -// Code generation proc -// -// ================================================================ - -void unit_write(entity_pool_t *pool, i64 unit, i16 target, FILE *out) { - assert(pool != NULL); - assert(pool->entities[unit].is_enabled); - assert(pool->entities[unit].unit.entry_point != UNDEFINED); - assert(out != NULL); - assert(target == (FORMAT_ELF | ARCH_X86_64)); - - printf("Writing ELF x86_64 executable...\n"); - - u16 ehs = 64; - u16 shs = 0; - u16 phs = 56; - u64 align = 8; - - u8 code[16] = { - 0xb8, // mov rax - 0x3c, 0, 0, 0, // 60 // exit - 0x48, 0x31, 0xff, // xor rdx, rdx // rdx = 0 - 0x0f, 0x05, // syscall - }; - - u64 code_offset = (ehs + phs + (align - 1)) & (~(align - 1)); - u64 code_size = sizeof code; - - assert((code_offset % align) == 0); - assert((code_size % align) == 0); - - u64 entry = 0x400000 + ehs + phs; - - // ELF header - // - - #define WRITE_V(...) fwrite(&(u8[]) {__VA_ARGS__}, 1, sizeof((u8[]) {__VA_ARGS__}), out) - #define WRITE_DUP(x, n) fwrite( (u8[n]) { 0 }, 1, n, out) - #define WRITE_2(x) fwrite(&(u16) { x }, 2, 1, out) - #define WRITE_4(x) fwrite(&(u32) { x }, 4, 1, out) - #define WRITE_8(x) fwrite(&(u64) { x }, 8, 1, out) - - WRITE_V( 0x7f, 'E', 'L', 'F' ); // magic - - WRITE_V( 2 ); // elf64 - WRITE_V( 1 ); // 2's complement, little endian - WRITE_V( 1 ); // current version - WRITE_V( 0 ); // ABI = SysV - WRITE_V( 0 ); // ABI version - - WRITE_DUP(0, 7); // padding - - WRITE_2( 2 ); // executable - WRITE_2( 62 ); // x86_64 - WRITE_4( 1 ); // current version - WRITE_8( entry ); // entry point address - WRITE_8( ehs ); // program header offset - WRITE_8( 0 ); // section header offset - WRITE_4( 0 ); // flags - WRITE_2( ehs ); // ELF header size - WRITE_2( phs ); // program header size - WRITE_2( 1 ); // program header count - WRITE_2( shs ); // section header size - WRITE_2( 0 ); // section header count - WRITE_2( 0 ); // string table section header index - - // Program header - // - - WRITE_4( 1 ); // type (PT_LOAD) - WRITE_4( 5 ); // flags (PF_X | PF_R) - WRITE_8( code_offset ); // offset - WRITE_8( entry ); // vaddr - WRITE_8( entry ); // paddr - WRITE_8( code_size ); // filesz - WRITE_8( code_size ); // memsz - WRITE_8( 8 ); // align - - #undef WRITE_V - #undef WRITE_DUP - #undef WRITE_32 - #undef WRITE_64 - - // Code - // - - for (i64 i = code_offset - ehs - phs; i > 0; --i) - fwrite(&(u8) { 0 }, 1, 1, out); - - fwrite(code, 1, code_size, out); -} - -// ================================================================ -// -// Helpers -// -// ================================================================ - -// Global state -// - -static entity_pool_t g_pool = { 0 }; - -// Handy procedures -// - -i64 n_i64(i64 value) { - return node_op_i64(&g_pool, value); -} - -i64 n_ret(i64 node_return_value) { - return node_op_ret(&g_pool, node_return_value); -} - -i64 p_new(c8 const *name) { - i64 p = proc_init(&g_pool); - proc_set_name(&g_pool, p, strlen(name), name); - return p; -} - -void p_add(i64 proc, i64 node) { - proc_node_add(&g_pool, proc, node); -} - -i64 u_new() { - return unit_init(&g_pool); -} - -void u_add(i64 unit, i64 proc) { - unit_proc_add(&g_pool, unit, proc); -} - -void u_entry_point(i64 unit, i64 proc) { - unit_set_entry_point(&g_pool, unit, proc); -} - -void u_elf_x86_64(i64 unit, c8 const *output_file_name) { - FILE *f = fopen(output_file_name, "wb"); - assert(f != NULL); - unit_write(&g_pool, unit, FORMAT_ELF | ARCH_X86_64, f); -#ifdef __unix__ - fchmod(fileno(f), 0775); -#endif - fclose(f); -} - -// ================================================================ -// -// Example -// -// ================================================================ - -int main(int argc, char **argv) { - printf("node - %d bytes\n", (i32) sizeof(node_t)); - printf("proc - %d bytes\n", (i32) sizeof(proc_t)); - printf("unit - %d bytes\n", (i32) sizeof(unit_t)); - - i64 main = p_new("main"); - i64 n0 = n_i64(42); - p_add(main, n0); - p_add(main, n_ret(n0)); - - i64 u = u_new(); - u_add(u, main); - u_entry_point(u, main); - u_elf_x86_64(u, "test_foo"); - - printf("\nBye!\n"); - return 0; -} |