From cc1cd3f8f48dc506086715bb207dfbf175c11575 Mon Sep 17 00:00:00 2001 From: Mitya Selivanov Date: Sun, 9 Jun 2024 21:28:30 +0200 Subject: Cleanup; docs --- bxgen.c | 601 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 601 insertions(+) create mode 100755 bxgen.c (limited to 'bxgen.c') diff --git a/bxgen.c b/bxgen.c new file mode 100755 index 0000000..d672b2f --- /dev/null +++ b/bxgen.c @@ -0,0 +1,601 @@ +#if 0 +SRC=${0##*/} +BIN=${SRC%.*} +gcc -o $BIN.tmp $SRC && ./$BIN.tmp $@ && rm $BIN.tmp +exit $? +#endif + +// ================================================================ +// +// bxgen.c +// Binary executable code generation - compiler backend +// +// Qualities +// +// - Single source file (for now) +// - Simple and flexible API +// - No external dependencies +// - No configuration required +// - No dynamic memory management +// - Easy cross-compilation +// - Platform-independent host +// +// To-Do list +// +// - ELF + x86_64 executable +// - x86_64 object file +// - Linking libraries +// - Proper error handling +// - Proper prefixes for identifiers +// - Effective entity allocation +// - Linked lists for large entities +// - Sea of Nodes +// - Optimization layers +// - Multithreading +// - Memory reallocation when necessary +// - JIT +// - COFF, PE, OMF, Mach-O +// - i386, RISC-V, ARM +// - Built-in standard library +// +// Bugs +// +// - ... +// +// Done features +// +// - ELF header +// +// ================================================================ +// +// Compilation options +// +// ================================================================ + +//#define DISABLE_IMPLEMENTATION +//#define DISABLE_HELPERS +//#define DISABLE_EXAMPLE + +// ================================================================ +// +// Basic declarations +// +// ================================================================ + +#include + +typedef signed char i8; +typedef signed short i16; +typedef signed int i32; +typedef signed long long i64; +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef float f32; +typedef double f64; +typedef signed char b8; // 8-bit boolean +typedef int b32; // 32-bit boolean +typedef int s32; // 32-bit status code +typedef char c8; // 8-bit character + +// ================================================================ +// +// IR data declarations +// +// ================================================================ + +enum { + // For indices + UNDEFINED = -1, + + // Formats + // + + FORMAT_ELF = 1, + FORMAT_COFF, + FORMAT_PE, + FORMAT_OMF, + FORMAT_MATCH_O, + + // Architecture + // + + ARCH_RISC_V = 64, + ARCH_I386, + ARCH_X86_64, + ARCH_ARM, + + // Sea of Nodes flow type + // + + FLOW_DATA = 0, + FLOW_CONTROL, + + // Semantic node operations + // + + OP_I64 = 0, + OP_RET, + + // Entity types + // + + TYPE_NODE = 0, + TYPE_PROC, + TYPE_UNIT, + + // Limits + // + // NOTE + // + // All limits can be exceeded using the linked list of entities + // (see `entity_t::tail`), except for `MAX_ENTITY_COUNT`. + // + + MAX_LITERAL_SIZE = 400, + MAX_NAME_SIZE = 128, + MAX_PROC_COUNT = 80, + MAX_NODE_COUNT = 60, + MAX_ENTITY_COUNT = 16384, +}; + +// A semantic node is an operation with optional data +// and possible references to other nodes. +// + +typedef struct { + i16 op; + i8 flow; + union { + u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal + i64 lit_int; // integer literal + i64 ref_node[4]; // references to other nodes + }; +} node_t; + +// A procedure is a collection of semantic nodes +// and has a string name. +// + +typedef struct { + i64 name_size; + c8 name[MAX_NAME_SIZE]; + i64 node_count; + i64 nodes[MAX_NODE_COUNT]; +} proc_t; + +// A compilation unit is a collection of procedures. +// + +typedef struct { + i64 entry_point; + i64 proc_count; + i64 procs[MAX_PROC_COUNT]; +} unit_t; + +// An entity can be any of: +// - `node_t` +// - `proc_t` +// - `unit_t` +// +// Every entity can be referenced by it's unique index +// in the entity pool. +// +// If the entity's data doesn't fit in one entity, tail is +// an index that leads to the entity with the rest of the +// data, forming a linked list. +// + +typedef struct { + b8 is_enabled; + i16 type; + i64 tail; + union { + node_t node; + proc_t proc; + unit_t unit; + }; +} entity_t; + +// Pool, a collection of all entities. +// + +typedef struct { + i64 entity_count; + i64 entities_capacity; + entity_t *entities; +} entity_pool_t; + +// ================================================================ +// +// API declarations +// +// ================================================================ + +#ifdef __cplusplus +extern "C" { +#endif + +i64 pool_entity_add(entity_pool_t *pool, entity_t data); +void pool_entity_remove(entity_pool_t *pool, i64 entity, i16 type); + +i64 node_init(entity_pool_t *pool, node_t data); +void node_destroy(entity_pool_t *pool, i64 node); +i64 node_op_i64(entity_pool_t *pool, i64 value); +i64 node_op_ret(entity_pool_t *pool, i64 node_return_value); + +i64 proc_init(entity_pool_t *pool); +void proc_destroy(entity_pool_t *pool, i64 proc); +void proc_set_name(entity_pool_t *pool, i64 proc, i64 name_size, c8 const *name); +void proc_node_add(entity_pool_t *pool, i64 proc, i64 node); +void proc_node_remove(entity_pool_t *pool, i64 proc, i64 node); + +i64 unit_init(entity_pool_t *pool); +void unit_destroy(entity_pool_t *pool, i64 unit); +void unit_proc_add(entity_pool_t *pool, i64 unit, i64 proc); +void unit_proc_remove(entity_pool_t *pool, i64 unit, i64 proc); +void unit_set_entry_point(entity_pool_t *pool, i64 unit, i64 entry_point_proc); +void unit_write(entity_pool_t *pool, i64 unit, u16 target, FILE *out); + +#ifndef DISABLE_HELPERS +i64 n_i64(i64 value); +i64 n_ret(i64 node_return_value); +i64 p_new(c8 const *name); +void p_add(i64 proc, i64 node); +i64 u_new(); +void u_add(i64 unit, i64 proc); +void u_entry_point(i64 unit, i64 proc); +void u_elf_x86_64(i64 unit, c8 const *output_file_name); +#endif + +#ifdef __cplusplus +} +#endif + +// ================================================================ +// +// Implementation +// +// ================================================================ + +#ifndef DISABLE_IMPLEMENTATION + +#ifdef __cplusplus +#error Implementation code should be compiled with a C compiler! +#endif + +#include +#include +#include + +#ifdef __unix__ +#include +#include +#endif + +// IR building procs +// + +i64 pool_entity_add(entity_pool_t *pool, entity_t data) { + assert(pool != NULL); + assert(pool->entity_count < pool->entities_capacity); + + i64 id = pool->entity_count++; + data.is_enabled = 1, + pool->entities[id] = data; + + return id; +} + +void pool_entity_remove(entity_pool_t *pool, i64 entity, i16 type) { + assert(pool != NULL); + assert(pool->entities[entity].is_enabled); + assert(pool->entities[entity].type == type); + + pool->entities[entity].is_enabled = 1; +} + +i64 node_init(entity_pool_t *pool, node_t data) { + return pool_entity_add(pool, (entity_t) { + .type = TYPE_NODE, + .tail = UNDEFINED, + .node = data, + }); +} + +void node_destroy(entity_pool_t *pool, i64 node) { + pool_entity_remove(pool, node, TYPE_NODE); +} + +i64 node_op_i64(entity_pool_t *pool, i64 value) { + return node_init(pool, (node_t) { + .op = OP_I64, + .flow = FLOW_DATA, + .lit_int = value, + }); +} + +i64 node_op_ret(entity_pool_t *pool, i64 node_return_value) { + return node_init(pool, (node_t) { + .op = OP_RET, + .flow = FLOW_CONTROL, + .ref_node = node_return_value, + }); +} + +i64 proc_init(entity_pool_t *pool) { + return pool_entity_add(pool, (entity_t) { + .type = TYPE_PROC, + .tail = UNDEFINED, + }); +} + +void proc_destroy(entity_pool_t *pool, i64 proc) { + pool_entity_remove(pool, proc, TYPE_PROC); +} + +void proc_set_name(entity_pool_t *pool, i64 proc, i64 name_size, c8 const *name) { + assert(pool != NULL); + assert(pool->entities[proc].is_enabled); + assert(pool->entities[proc].type == TYPE_PROC); + + // TODO + // Implement large entities. + assert(name_size <= MAX_NAME_SIZE); + assert(name_size > 0); + + proc_t *p = &pool->entities[proc].proc; + p->name_size = name_size; + memcpy(p->name, name, name_size); +} + +void proc_node_add(entity_pool_t *pool, i64 proc, i64 node) { + assert(pool != NULL); + assert(pool->entities[proc].is_enabled); + assert(pool->entities[proc].type == TYPE_PROC); + assert(pool->entities[node].is_enabled); + assert(pool->entities[node].type == TYPE_NODE); + + proc_t *p = &pool->entities[proc].proc; + + // TODO + // Implement large entities. + assert(p->node_count < MAX_NODE_COUNT); + + p->nodes[p->node_count++] = node; +} + +void proc_node_remove(entity_pool_t *pool, i64 proc, i64 node) { + assert(pool != NULL); + assert(pool->entities[proc].is_enabled); + assert(pool->entities[proc].type == TYPE_PROC); + assert(pool->entities[node].type == TYPE_NODE); + + pool->entities[proc].proc.nodes[node] = UNDEFINED; +} + +i64 unit_init(entity_pool_t *pool) { + return pool_entity_add(pool, (entity_t) { + .type = TYPE_UNIT, + .tail = UNDEFINED, + .unit = (unit_t) { + .entry_point = UNDEFINED, + } + }); +} + +void unit_destroy(entity_pool_t *pool, i64 unit) { + pool_entity_remove(pool, unit, TYPE_UNIT); +} + +void unit_proc_add(entity_pool_t *pool, i64 unit, i64 proc) { + assert(pool != NULL); + assert(pool->entities[unit].is_enabled); + assert(pool->entities[unit].type == TYPE_UNIT); + assert(pool->entities[proc].is_enabled); + assert(pool->entities[proc].type == TYPE_PROC); + + unit_t *u = &pool->entities[unit].unit; + + // TODO + // Implement large entities. + assert(u->proc_count < MAX_PROC_COUNT); + + u->procs[u->proc_count++] = proc; +} + +void unit_proc_remove(entity_pool_t *pool, i64 unit, i64 proc) { + assert(pool != NULL); + assert(pool->entities[unit].is_enabled); + assert(pool->entities[unit].type == TYPE_UNIT); + assert(pool->entities[proc].type == TYPE_PROC); + + pool->entities[unit].unit.procs[proc] = UNDEFINED; +} + +void unit_set_entry_point(entity_pool_t *pool, i64 unit, i64 entry_point_proc) { + assert(pool != NULL); + assert(pool->entities[unit].is_enabled); + assert(pool->entities[unit].type == TYPE_UNIT); + assert(pool->entities[entry_point_proc].type == TYPE_PROC); + + pool->entities[unit].unit.entry_point = entry_point_proc; +} + +// Code generation proc +// + +void unit_write(entity_pool_t *pool, i64 unit, u16 target, FILE *out) { + assert(pool != NULL); + assert(pool->entities[unit].is_enabled); + assert(pool->entities[unit].unit.entry_point != UNDEFINED); + assert(out != NULL); + assert(target == (FORMAT_ELF | ARCH_X86_64)); + + printf("Writing ELF x86_64 executable...\n"); + + u16 ehs = 64; + u16 shs = 0; + u16 phs = 56; + u64 align = 8; + + u8 code[16] = { + 0xb8, // mov rax + 0x3c, 0, 0, 0, // 60 // exit + 0x48, 0x31, 0xff, // xor rdx, rdx // rdx = 0 + 0x0f, 0x05, // syscall + }; + + u64 code_offset = (ehs + phs + (align - 1)) & (~(align - 1)); + u64 code_size = sizeof code; + + assert((code_offset % align) == 0); + assert((code_size % align) == 0); + + u64 entry = 0x400000 + ehs + phs; + + // ELF header + // + + #define WRITE_V(...) fwrite(&(u8[]) {__VA_ARGS__}, 1, sizeof((u8[]) {__VA_ARGS__}), out) + #define WRITE_DUP(x, n) fwrite( (u8[n]) { 0 }, 1, n, out) + #define WRITE_2(x) fwrite(&(u16) { x }, 2, 1, out) + #define WRITE_4(x) fwrite(&(u32) { x }, 4, 1, out) + #define WRITE_8(x) fwrite(&(u64) { x }, 8, 1, out) + + WRITE_V( 0x7f, 'E', 'L', 'F' ); // magic + + WRITE_V( 2 ); // elf64 + WRITE_V( 1 ); // 2's complement, little endian + WRITE_V( 1 ); // current version + WRITE_V( 0 ); // ABI = SysV + WRITE_V( 0 ); // ABI version + + WRITE_DUP(0, 7); // padding + + WRITE_2( 2 ); // executable + WRITE_2( 62 ); // x86_64 + WRITE_4( 1 ); // current version + WRITE_8( entry ); // entry point address + WRITE_8( ehs ); // program header offset + WRITE_8( 0 ); // section header offset + WRITE_4( 0 ); // flags + WRITE_2( ehs ); // ELF header size + WRITE_2( phs ); // program header size + WRITE_2( 1 ); // program header count + WRITE_2( shs ); // section header size + WRITE_2( 0 ); // section header count + WRITE_2( 0 ); // string table section header index + + // Program header + // + + WRITE_4( 1 ); // type (PT_LOAD) + WRITE_4( 5 ); // flags (PF_X | PF_R) + WRITE_8( code_offset ); // offset + WRITE_8( entry ); // vaddr + WRITE_8( entry ); // paddr + WRITE_8( code_size ); // filesz + WRITE_8( code_size ); // memsz + WRITE_8( 8 ); // align + + #undef WRITE_V + #undef WRITE_DUP + #undef WRITE_32 + #undef WRITE_64 + + // Code + // + + for (i64 i = code_offset - ehs - phs; i > 0; --i) + fwrite(&(u8) { 0 }, 1, 1, out); + + fwrite(code, 1, code_size, out); +} + +// Helpers implementation +// + +#ifndef DISABLE_HELPERS + +// Global state +// + +static entity_pool_t g_pool = { + .entities_capacity = MAX_ENTITY_COUNT, + .entities = (entity_t[MAX_ENTITY_COUNT]) { 0 }, +}; + +// Handy procedures +// + +i64 n_i64(i64 value) { + return node_op_i64(&g_pool, value); +} + +i64 n_ret(i64 node_return_value) { + return node_op_ret(&g_pool, node_return_value); +} + +i64 p_new(c8 const *name) { + i64 p = proc_init(&g_pool); + proc_set_name(&g_pool, p, strlen(name), name); + return p; +} + +void p_add(i64 proc, i64 node) { + proc_node_add(&g_pool, proc, node); +} + +i64 u_new() { + return unit_init(&g_pool); +} + +void u_add(i64 unit, i64 proc) { + unit_proc_add(&g_pool, unit, proc); +} + +void u_entry_point(i64 unit, i64 proc) { + unit_set_entry_point(&g_pool, unit, proc); +} + +void u_elf_x86_64(i64 unit, c8 const *output_file_name) { + FILE *f = fopen(output_file_name, "wb"); + assert(f != NULL); + unit_write(&g_pool, unit, FORMAT_ELF | ARCH_X86_64, f); +#ifdef __unix__ + fchmod(fileno(f), 0775); +#endif + fclose(f); +} + +#endif + +// Example +// + +#ifndef DISABLE_EXAMPLE + +int main(int argc, char **argv) { + printf("node - %d bytes\n", (i32) sizeof(node_t)); + printf("proc - %d bytes\n", (i32) sizeof(proc_t)); + printf("unit - %d bytes\n", (i32) sizeof(unit_t)); + + i64 main = p_new("main"); + i64 n0 = n_i64(42); + p_add(main, n0); + p_add(main, n_ret(n0)); + + i64 u = u_new(); + u_add(u, main); + u_entry_point(u, main); + u_elf_x86_64(u, "test_foo"); + + printf("\nBye!\n"); + return 0; +} + +#endif + +#endif -- cgit v1.2.3