#if 0 SRC=${0##*/} BIN=${SRC%.*} gcc \ -Wno-missing-field-initializers -Wno-missing-braces \ -Wall -Wextra -Werror -pedantic \ -O0 -fsanitize=undefined,address,leak -mshstk \ -o $BIN.tmp $SRC && \ ./$BIN.tmp $@ && rm $BIN.tmp exit $? #endif // ================================================================ // // bxgen.c // Binary executable code generation - compiler backend // // Qualities // // - Single source file (for now) // - Simple and flexible API // - No external dependencies // - No configuration required // - No dynamic memory management // - Easy cross-compilation // - Platform-independent host // // To-Do list // // - ELF + x86_64 executable // - x86_64 object file // - Linking libraries // - Proper error handling // - Proper prefixes for identifiers // - Effective entity allocation // - Linked lists for large entities // - Sea of Nodes // - Optimization layers // - Multithreading // - Memory reallocation when necessary // - JIT // - COFF, PE, OMF, Mach-O // - i386, RISC-V, ARM // - Built-in standard library // // Bugs // // - ... // // Done features // // - ELF header // // ================================================================ // // Compilation options // // ================================================================ //#define DISABLE_IMPLEMENTATION //#define DISABLE_HELPERS //#define DISABLE_EXAMPLE // ================================================================ // // Basic declarations // // ================================================================ #include typedef signed char i8; typedef signed short i16; typedef signed int i32; typedef signed long long i64; typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; typedef float f32; typedef double f64; typedef signed char b8; // 8-bit boolean typedef int b32; // 32-bit boolean typedef int s32; // 32-bit status code typedef char c8; // 8-bit character // ================================================================ // // IR data declarations // // ================================================================ enum { // For indices UNDEFINED = -1, // Formats // FORMAT_ELF = 1, FORMAT_COFF, FORMAT_PE, FORMAT_OMF, FORMAT_MATCH_O, // Architecture // ARCH_RISC_V = 64, ARCH_I386, ARCH_X86_64, ARCH_ARM, // Sea of Nodes flow type // FLOW_DATA = 0, FLOW_CONTROL, // Semantic node operations // OP_I64 = 0, OP_RET, // Entity types // TYPE_NODE = 0, TYPE_PROC, TYPE_UNIT, // Limits // // NOTE // // All limits can be exceeded using the linked list of entities // (see `entity_t::tail`), except for `MAX_ENTITY_COUNT`. // MAX_LITERAL_SIZE = 400, MAX_NAME_SIZE = 128, MAX_PROC_COUNT = 80, MAX_NODE_COUNT = 60, MAX_ENTITY_COUNT = 16384, }; // A semantic node is an operation with optional data // and possible references to other nodes. // typedef struct { i16 op; i8 flow; // `FLOW_DATA` or `FLOW_CONTROL` union { u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal i64 lit_int; // integer literal i64 ref_node[4]; // references to other nodes }; } node_t; // A procedure is a collection of semantic nodes // and has a string name. // typedef struct { i64 name_size; c8 name[MAX_NAME_SIZE]; i64 node_count; i64 nodes[MAX_NODE_COUNT]; } proc_t; // A compilation unit is a collection of procedures. // typedef struct { i64 entry_point; i64 proc_count; i64 procs[MAX_PROC_COUNT]; } unit_t; // An entity can be any of: // - `node_t` // - `proc_t` // - `unit_t` // // Every entity can be referenced by it's unique index // in the entity pool. // // If the entity's data doesn't fit in one entity, tail is // an index that leads to the entity with the rest of the // data, forming a linked list. // typedef struct { b8 is_enabled; i16 type; i64 tail; union { node_t node; proc_t proc; unit_t unit; }; } entity_t; // Pool, a collection of all entities. // // NOTE // We use one single large memory block for *everything*. // typedef struct { i64 entity_count; i64 capacity; entity_t *entities; } pool_t; // ================================================================ // // API declarations // // ================================================================ #ifdef __cplusplus extern "C" { #endif i64 pool_add(pool_t *pool, entity_t data); void pool_remove(pool_t *pool, i64 entity, i16 type); i64 node_init(pool_t *pool, node_t data); void node_destroy(pool_t *pool, i64 node); i64 node_op_i64(pool_t *pool, i64 value); i64 node_op_ret(pool_t *pool, i64 node_return_value); i64 proc_init(pool_t *pool); void proc_destroy(pool_t *pool, i64 proc); void proc_set_name(pool_t *pool, i64 proc, i64 name_size, c8 const *name); void proc_node_add(pool_t *pool, i64 proc, i64 node); void proc_node_remove(pool_t *pool, i64 proc, i64 node); i64 unit_init(pool_t *pool); void unit_destroy(pool_t *pool, i64 unit); void unit_proc_add(pool_t *pool, i64 unit, i64 proc); void unit_proc_remove(pool_t *pool, i64 unit, i64 proc); void unit_set_entry_point(pool_t *pool, i64 unit, i64 entry_point_proc); void unit_write(pool_t *pool, i64 unit, u16 target, FILE *out); #ifndef DISABLE_HELPERS i64 n_i64(i64 value); i64 n_ret(i64 node_return_value); i64 p_new(c8 const *name); void p_add(i64 proc, i64 node); i64 u_new(); void u_add(i64 unit, i64 proc); void u_entry_point(i64 unit, i64 proc); void u_elf_x86_64(i64 unit, c8 const *output_file_name); #endif #ifdef __cplusplus } #endif // ================================================================ // // Main features implementation // // ================================================================ #ifndef DISABLE_IMPLEMENTATION #ifdef __cplusplus #error Implementation code should be compiled with a C compiler! #endif #include // memcpy #include // assert // IR building procs // i64 pool_add(pool_t *pool, entity_t data) { assert(pool != NULL && pool->entities != NULL); assert(pool->entity_count < pool->capacity); i64 id = pool->entity_count++; data.is_enabled = 1, pool->entities[id] = data; return id; } void pool_remove(pool_t *pool, i64 entity, i16 type) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[entity].is_enabled); assert(pool->entities[entity].type == type); pool->entities[entity].is_enabled = 1; } i64 node_init(pool_t *pool, node_t data) { return pool_add(pool, (entity_t) { .type = TYPE_NODE, .tail = UNDEFINED, .node = data, }); } void node_destroy(pool_t *pool, i64 node) { pool_remove(pool, node, TYPE_NODE); } i64 node_op_i64(pool_t *pool, i64 value) { return node_init(pool, (node_t) { .op = OP_I64, .flow = FLOW_DATA, .lit_int = value, }); } i64 node_op_ret(pool_t *pool, i64 node_return_value) { return node_init(pool, (node_t) { .op = OP_RET, .flow = FLOW_CONTROL, .ref_node = { node_return_value, 0, }, }); } i64 proc_init(pool_t *pool) { return pool_add(pool, (entity_t) { .type = TYPE_PROC, .tail = UNDEFINED, }); } void proc_destroy(pool_t *pool, i64 proc) { pool_remove(pool, proc, TYPE_PROC); } void proc_set_name(pool_t *pool, i64 proc, i64 name_size, c8 const *name) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[proc].is_enabled); assert(pool->entities[proc].type == TYPE_PROC); // TODO // Implement large entities. assert(name_size <= MAX_NAME_SIZE); assert(name_size > 0); proc_t *p = &pool->entities[proc].proc; p->name_size = name_size; memcpy(p->name, name, name_size); } void proc_node_add(pool_t *pool, i64 proc, i64 node) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[proc].is_enabled); assert(pool->entities[proc].type == TYPE_PROC); assert(pool->entities[node].is_enabled); assert(pool->entities[node].type == TYPE_NODE); proc_t *p = &pool->entities[proc].proc; // TODO // Implement large entities. assert(p->node_count < MAX_NODE_COUNT); p->nodes[p->node_count++] = node; } void proc_node_remove(pool_t *pool, i64 proc, i64 node) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[proc].is_enabled); assert(pool->entities[proc].type == TYPE_PROC); assert(pool->entities[node].type == TYPE_NODE); pool->entities[proc].proc.nodes[node] = UNDEFINED; } i64 unit_init(pool_t *pool) { return pool_add(pool, (entity_t) { .type = TYPE_UNIT, .tail = UNDEFINED, .unit = (unit_t) { .entry_point = UNDEFINED, } }); } void unit_destroy(pool_t *pool, i64 unit) { pool_remove(pool, unit, TYPE_UNIT); } void unit_proc_add(pool_t *pool, i64 unit, i64 proc) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[unit].is_enabled); assert(pool->entities[unit].type == TYPE_UNIT); assert(pool->entities[proc].is_enabled); assert(pool->entities[proc].type == TYPE_PROC); unit_t *u = &pool->entities[unit].unit; // TODO // Implement large entities. assert(u->proc_count < MAX_PROC_COUNT); u->procs[u->proc_count++] = proc; } void unit_proc_remove(pool_t *pool, i64 unit, i64 proc) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[unit].is_enabled); assert(pool->entities[unit].type == TYPE_UNIT); assert(pool->entities[proc].type == TYPE_PROC); pool->entities[unit].unit.procs[proc] = UNDEFINED; } void unit_set_entry_point(pool_t *pool, i64 unit, i64 entry_point_proc) { assert(pool != NULL && pool->entities != NULL); assert(pool->entities[unit].is_enabled); assert(pool->entities[unit].type == TYPE_UNIT); assert(pool->entities[entry_point_proc].type == TYPE_PROC); pool->entities[unit].unit.entry_point = entry_point_proc; } // Code generation proc // void unit_write(pool_t *pool, i64 unit, u16 target, FILE *out) { // TODO // Use callback instead of `FILE` to not depend on `stdio.h` here. assert(pool != NULL && pool->entities != NULL); assert(pool->entities[unit].is_enabled); assert(pool->entities[unit].unit.entry_point != UNDEFINED); assert(out != NULL); assert(target == (FORMAT_ELF | ARCH_X86_64)); printf("Writing ELF x86_64 executable...\n"); u16 ehs = 64; u16 shs = 0; u16 phs = 56; u64 align = 8; u8 code[16] = { 0xb8, // mov rax 0x3c, 0, 0, 0, // 60 // exit 0x48, 0x31, 0xff, // xor rdx, rdx // rdx = 0 0x0f, 0x05, // syscall }; u64 code_offset = (ehs + phs + (align - 1)) & (~(align - 1)); u64 code_size = sizeof code; assert((code_offset % align) == 0); assert((code_size % align) == 0); u64 entry = 0x400000 + ehs + phs; // ELF header // #define WRITE_V(...) fwrite(&(u8[]) {__VA_ARGS__}, 1, sizeof((u8[]) {__VA_ARGS__}), out) #define WRITE_DUP(x, n) fwrite( (u8[n]) { 0 }, 1, n, out) #define WRITE_2(x) fwrite(&(u16) { x }, 2, 1, out) #define WRITE_4(x) fwrite(&(u32) { x }, 4, 1, out) #define WRITE_8(x) fwrite(&(u64) { x }, 8, 1, out) WRITE_V( 0x7f, 'E', 'L', 'F' ); // magic WRITE_V( 2 ); // elf64 WRITE_V( 1 ); // 2's complement, little endian WRITE_V( 1 ); // current version WRITE_V( 0 ); // ABI = SysV WRITE_V( 0 ); // ABI version WRITE_DUP(0, 7); // padding WRITE_2( 2 ); // executable WRITE_2( 62 ); // x86_64 WRITE_4( 1 ); // current version WRITE_8( entry ); // entry point address WRITE_8( ehs ); // program header offset WRITE_8( 0 ); // section header offset WRITE_4( 0 ); // flags WRITE_2( ehs ); // ELF header size WRITE_2( phs ); // program header size WRITE_2( 1 ); // program header count WRITE_2( shs ); // section header size WRITE_2( 0 ); // section header count WRITE_2( 0 ); // string table section header index // Program header // WRITE_4( 1 ); // type (PT_LOAD) WRITE_4( 5 ); // flags (PF_X | PF_R) WRITE_8( code_offset ); // offset WRITE_8( entry ); // vaddr WRITE_8( entry ); // paddr WRITE_8( code_size ); // filesz WRITE_8( code_size ); // memsz WRITE_8( 8 ); // align #undef WRITE_V #undef WRITE_DUP #undef WRITE_32 #undef WRITE_64 // Code // for (i64 i = code_offset - ehs - phs; i > 0; --i) fwrite(&(u8) { 0 }, 1, 1, out); fwrite(code, 1, code_size, out); } // ================================================================ // // Helpers implementation // // ================================================================ #ifndef DISABLE_HELPERS #ifdef __unix__ #include #include #endif // Global state // static pool_t g_pool = { // Statically allocate a large memory block. // // TODO // Reallocate the memory block when necessary. // .capacity = MAX_ENTITY_COUNT, .entities = (entity_t[MAX_ENTITY_COUNT]) { 0 }, }; // Handy procedures // i64 n_i64(i64 value) { return node_op_i64(&g_pool, value); } i64 n_ret(i64 node_return_value) { return node_op_ret(&g_pool, node_return_value); } i64 p_new(c8 const *name) { i64 p = proc_init(&g_pool); proc_set_name(&g_pool, p, strlen(name), name); return p; } void p_add(i64 proc, i64 node) { proc_node_add(&g_pool, proc, node); } i64 u_new() { return unit_init(&g_pool); } void u_add(i64 unit, i64 proc) { unit_proc_add(&g_pool, unit, proc); } void u_entry_point(i64 unit, i64 proc) { unit_set_entry_point(&g_pool, unit, proc); } void u_elf_x86_64(i64 unit, c8 const *output_file_name) { FILE *f = fopen(output_file_name, "wb"); assert(f != NULL); unit_write(&g_pool, unit, FORMAT_ELF | ARCH_X86_64, f); #ifdef __unix__ fchmod(fileno(f), 0775); #endif fclose(f); } #endif // ================================================================ // // Example // // ================================================================ #ifndef DISABLE_EXAMPLE int main(int argc, char **argv) { (void) argc; (void) argv; printf("node - %d bytes\n", (i32) sizeof(node_t)); printf("proc - %d bytes\n", (i32) sizeof(proc_t)); printf("unit - %d bytes\n", (i32) sizeof(unit_t)); printf("entity - %d bytes\n", (i32) sizeof(entity_t)); i64 main = p_new("main"); i64 n0 = n_i64(42); p_add(main, n0); p_add(main, n_ret(n0)); i64 u = u_new(); u_add(u, main); u_entry_point(u, main); u_elf_x86_64(u, "test_foo"); printf("\nBye!\n"); return 0; } #endif #endif