summaryrefslogtreecommitdiff
path: root/bxgen.c
diff options
context:
space:
mode:
Diffstat (limited to 'bxgen.c')
-rwxr-xr-xbxgen.c601
1 files changed, 601 insertions, 0 deletions
diff --git a/bxgen.c b/bxgen.c
new file mode 100755
index 0000000..d672b2f
--- /dev/null
+++ b/bxgen.c
@@ -0,0 +1,601 @@
+#if 0
+SRC=${0##*/}
+BIN=${SRC%.*}
+gcc -o $BIN.tmp $SRC && ./$BIN.tmp $@ && rm $BIN.tmp
+exit $?
+#endif
+
+// ================================================================
+//
+// bxgen.c
+// Binary executable code generation - compiler backend
+//
+// Qualities
+//
+// - Single source file (for now)
+// - Simple and flexible API
+// - No external dependencies
+// - No configuration required
+// - No dynamic memory management
+// - Easy cross-compilation
+// - Platform-independent host
+//
+// To-Do list
+//
+// - ELF + x86_64 executable
+// - x86_64 object file
+// - Linking libraries
+// - Proper error handling
+// - Proper prefixes for identifiers
+// - Effective entity allocation
+// - Linked lists for large entities
+// - Sea of Nodes
+// - Optimization layers
+// - Multithreading
+// - Memory reallocation when necessary
+// - JIT
+// - COFF, PE, OMF, Mach-O
+// - i386, RISC-V, ARM
+// - Built-in standard library
+//
+// Bugs
+//
+// - ...
+//
+// Done features
+//
+// - ELF header
+//
+// ================================================================
+//
+// Compilation options
+//
+// ================================================================
+
+//#define DISABLE_IMPLEMENTATION
+//#define DISABLE_HELPERS
+//#define DISABLE_EXAMPLE
+
+// ================================================================
+//
+// Basic declarations
+//
+// ================================================================
+
+#include <stdio.h>
+
+typedef signed char i8;
+typedef signed short i16;
+typedef signed int i32;
+typedef signed long long i64;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+typedef float f32;
+typedef double f64;
+typedef signed char b8; // 8-bit boolean
+typedef int b32; // 32-bit boolean
+typedef int s32; // 32-bit status code
+typedef char c8; // 8-bit character
+
+// ================================================================
+//
+// IR data declarations
+//
+// ================================================================
+
+enum {
+ // For indices
+ UNDEFINED = -1,
+
+ // Formats
+ //
+
+ FORMAT_ELF = 1,
+ FORMAT_COFF,
+ FORMAT_PE,
+ FORMAT_OMF,
+ FORMAT_MATCH_O,
+
+ // Architecture
+ //
+
+ ARCH_RISC_V = 64,
+ ARCH_I386,
+ ARCH_X86_64,
+ ARCH_ARM,
+
+ // Sea of Nodes flow type
+ //
+
+ FLOW_DATA = 0,
+ FLOW_CONTROL,
+
+ // Semantic node operations
+ //
+
+ OP_I64 = 0,
+ OP_RET,
+
+ // Entity types
+ //
+
+ TYPE_NODE = 0,
+ TYPE_PROC,
+ TYPE_UNIT,
+
+ // Limits
+ //
+ // NOTE
+ //
+ // All limits can be exceeded using the linked list of entities
+ // (see `entity_t::tail`), except for `MAX_ENTITY_COUNT`.
+ //
+
+ MAX_LITERAL_SIZE = 400,
+ MAX_NAME_SIZE = 128,
+ MAX_PROC_COUNT = 80,
+ MAX_NODE_COUNT = 60,
+ MAX_ENTITY_COUNT = 16384,
+};
+
+// A semantic node is an operation with optional data
+// and possible references to other nodes.
+//
+
+typedef struct {
+ i16 op;
+ i8 flow;
+ union {
+ u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal
+ i64 lit_int; // integer literal
+ i64 ref_node[4]; // references to other nodes
+ };
+} node_t;
+
+// A procedure is a collection of semantic nodes
+// and has a string name.
+//
+
+typedef struct {
+ i64 name_size;
+ c8 name[MAX_NAME_SIZE];
+ i64 node_count;
+ i64 nodes[MAX_NODE_COUNT];
+} proc_t;
+
+// A compilation unit is a collection of procedures.
+//
+
+typedef struct {
+ i64 entry_point;
+ i64 proc_count;
+ i64 procs[MAX_PROC_COUNT];
+} unit_t;
+
+// An entity can be any of:
+// - `node_t`
+// - `proc_t`
+// - `unit_t`
+//
+// Every entity can be referenced by it's unique index
+// in the entity pool.
+//
+// If the entity's data doesn't fit in one entity, tail is
+// an index that leads to the entity with the rest of the
+// data, forming a linked list.
+//
+
+typedef struct {
+ b8 is_enabled;
+ i16 type;
+ i64 tail;
+ union {
+ node_t node;
+ proc_t proc;
+ unit_t unit;
+ };
+} entity_t;
+
+// Pool, a collection of all entities.
+//
+
+typedef struct {
+ i64 entity_count;
+ i64 entities_capacity;
+ entity_t *entities;
+} entity_pool_t;
+
+// ================================================================
+//
+// API declarations
+//
+// ================================================================
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+i64 pool_entity_add(entity_pool_t *pool, entity_t data);
+void pool_entity_remove(entity_pool_t *pool, i64 entity, i16 type);
+
+i64 node_init(entity_pool_t *pool, node_t data);
+void node_destroy(entity_pool_t *pool, i64 node);
+i64 node_op_i64(entity_pool_t *pool, i64 value);
+i64 node_op_ret(entity_pool_t *pool, i64 node_return_value);
+
+i64 proc_init(entity_pool_t *pool);
+void proc_destroy(entity_pool_t *pool, i64 proc);
+void proc_set_name(entity_pool_t *pool, i64 proc, i64 name_size, c8 const *name);
+void proc_node_add(entity_pool_t *pool, i64 proc, i64 node);
+void proc_node_remove(entity_pool_t *pool, i64 proc, i64 node);
+
+i64 unit_init(entity_pool_t *pool);
+void unit_destroy(entity_pool_t *pool, i64 unit);
+void unit_proc_add(entity_pool_t *pool, i64 unit, i64 proc);
+void unit_proc_remove(entity_pool_t *pool, i64 unit, i64 proc);
+void unit_set_entry_point(entity_pool_t *pool, i64 unit, i64 entry_point_proc);
+void unit_write(entity_pool_t *pool, i64 unit, u16 target, FILE *out);
+
+#ifndef DISABLE_HELPERS
+i64 n_i64(i64 value);
+i64 n_ret(i64 node_return_value);
+i64 p_new(c8 const *name);
+void p_add(i64 proc, i64 node);
+i64 u_new();
+void u_add(i64 unit, i64 proc);
+void u_entry_point(i64 unit, i64 proc);
+void u_elf_x86_64(i64 unit, c8 const *output_file_name);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+// ================================================================
+//
+// Implementation
+//
+// ================================================================
+
+#ifndef DISABLE_IMPLEMENTATION
+
+#ifdef __cplusplus
+#error Implementation code should be compiled with a C compiler!
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#ifdef __unix__
+#include <sys/types.h>
+#include <sys/stat.h>
+#endif
+
+// IR building procs
+//
+
+i64 pool_entity_add(entity_pool_t *pool, entity_t data) {
+ assert(pool != NULL);
+ assert(pool->entity_count < pool->entities_capacity);
+
+ i64 id = pool->entity_count++;
+ data.is_enabled = 1,
+ pool->entities[id] = data;
+
+ return id;
+}
+
+void pool_entity_remove(entity_pool_t *pool, i64 entity, i16 type) {
+ assert(pool != NULL);
+ assert(pool->entities[entity].is_enabled);
+ assert(pool->entities[entity].type == type);
+
+ pool->entities[entity].is_enabled = 1;
+}
+
+i64 node_init(entity_pool_t *pool, node_t data) {
+ return pool_entity_add(pool, (entity_t) {
+ .type = TYPE_NODE,
+ .tail = UNDEFINED,
+ .node = data,
+ });
+}
+
+void node_destroy(entity_pool_t *pool, i64 node) {
+ pool_entity_remove(pool, node, TYPE_NODE);
+}
+
+i64 node_op_i64(entity_pool_t *pool, i64 value) {
+ return node_init(pool, (node_t) {
+ .op = OP_I64,
+ .flow = FLOW_DATA,
+ .lit_int = value,
+ });
+}
+
+i64 node_op_ret(entity_pool_t *pool, i64 node_return_value) {
+ return node_init(pool, (node_t) {
+ .op = OP_RET,
+ .flow = FLOW_CONTROL,
+ .ref_node = node_return_value,
+ });
+}
+
+i64 proc_init(entity_pool_t *pool) {
+ return pool_entity_add(pool, (entity_t) {
+ .type = TYPE_PROC,
+ .tail = UNDEFINED,
+ });
+}
+
+void proc_destroy(entity_pool_t *pool, i64 proc) {
+ pool_entity_remove(pool, proc, TYPE_PROC);
+}
+
+void proc_set_name(entity_pool_t *pool, i64 proc, i64 name_size, c8 const *name) {
+ assert(pool != NULL);
+ assert(pool->entities[proc].is_enabled);
+ assert(pool->entities[proc].type == TYPE_PROC);
+
+ // TODO
+ // Implement large entities.
+ assert(name_size <= MAX_NAME_SIZE);
+ assert(name_size > 0);
+
+ proc_t *p = &pool->entities[proc].proc;
+ p->name_size = name_size;
+ memcpy(p->name, name, name_size);
+}
+
+void proc_node_add(entity_pool_t *pool, i64 proc, i64 node) {
+ assert(pool != NULL);
+ assert(pool->entities[proc].is_enabled);
+ assert(pool->entities[proc].type == TYPE_PROC);
+ assert(pool->entities[node].is_enabled);
+ assert(pool->entities[node].type == TYPE_NODE);
+
+ proc_t *p = &pool->entities[proc].proc;
+
+ // TODO
+ // Implement large entities.
+ assert(p->node_count < MAX_NODE_COUNT);
+
+ p->nodes[p->node_count++] = node;
+}
+
+void proc_node_remove(entity_pool_t *pool, i64 proc, i64 node) {
+ assert(pool != NULL);
+ assert(pool->entities[proc].is_enabled);
+ assert(pool->entities[proc].type == TYPE_PROC);
+ assert(pool->entities[node].type == TYPE_NODE);
+
+ pool->entities[proc].proc.nodes[node] = UNDEFINED;
+}
+
+i64 unit_init(entity_pool_t *pool) {
+ return pool_entity_add(pool, (entity_t) {
+ .type = TYPE_UNIT,
+ .tail = UNDEFINED,
+ .unit = (unit_t) {
+ .entry_point = UNDEFINED,
+ }
+ });
+}
+
+void unit_destroy(entity_pool_t *pool, i64 unit) {
+ pool_entity_remove(pool, unit, TYPE_UNIT);
+}
+
+void unit_proc_add(entity_pool_t *pool, i64 unit, i64 proc) {
+ assert(pool != NULL);
+ assert(pool->entities[unit].is_enabled);
+ assert(pool->entities[unit].type == TYPE_UNIT);
+ assert(pool->entities[proc].is_enabled);
+ assert(pool->entities[proc].type == TYPE_PROC);
+
+ unit_t *u = &pool->entities[unit].unit;
+
+ // TODO
+ // Implement large entities.
+ assert(u->proc_count < MAX_PROC_COUNT);
+
+ u->procs[u->proc_count++] = proc;
+}
+
+void unit_proc_remove(entity_pool_t *pool, i64 unit, i64 proc) {
+ assert(pool != NULL);
+ assert(pool->entities[unit].is_enabled);
+ assert(pool->entities[unit].type == TYPE_UNIT);
+ assert(pool->entities[proc].type == TYPE_PROC);
+
+ pool->entities[unit].unit.procs[proc] = UNDEFINED;
+}
+
+void unit_set_entry_point(entity_pool_t *pool, i64 unit, i64 entry_point_proc) {
+ assert(pool != NULL);
+ assert(pool->entities[unit].is_enabled);
+ assert(pool->entities[unit].type == TYPE_UNIT);
+ assert(pool->entities[entry_point_proc].type == TYPE_PROC);
+
+ pool->entities[unit].unit.entry_point = entry_point_proc;
+}
+
+// Code generation proc
+//
+
+void unit_write(entity_pool_t *pool, i64 unit, u16 target, FILE *out) {
+ assert(pool != NULL);
+ assert(pool->entities[unit].is_enabled);
+ assert(pool->entities[unit].unit.entry_point != UNDEFINED);
+ assert(out != NULL);
+ assert(target == (FORMAT_ELF | ARCH_X86_64));
+
+ printf("Writing ELF x86_64 executable...\n");
+
+ u16 ehs = 64;
+ u16 shs = 0;
+ u16 phs = 56;
+ u64 align = 8;
+
+ u8 code[16] = {
+ 0xb8, // mov rax
+ 0x3c, 0, 0, 0, // 60 // exit
+ 0x48, 0x31, 0xff, // xor rdx, rdx // rdx = 0
+ 0x0f, 0x05, // syscall
+ };
+
+ u64 code_offset = (ehs + phs + (align - 1)) & (~(align - 1));
+ u64 code_size = sizeof code;
+
+ assert((code_offset % align) == 0);
+ assert((code_size % align) == 0);
+
+ u64 entry = 0x400000 + ehs + phs;
+
+ // ELF header
+ //
+
+ #define WRITE_V(...) fwrite(&(u8[]) {__VA_ARGS__}, 1, sizeof((u8[]) {__VA_ARGS__}), out)
+ #define WRITE_DUP(x, n) fwrite( (u8[n]) { 0 }, 1, n, out)
+ #define WRITE_2(x) fwrite(&(u16) { x }, 2, 1, out)
+ #define WRITE_4(x) fwrite(&(u32) { x }, 4, 1, out)
+ #define WRITE_8(x) fwrite(&(u64) { x }, 8, 1, out)
+
+ WRITE_V( 0x7f, 'E', 'L', 'F' ); // magic
+
+ WRITE_V( 2 ); // elf64
+ WRITE_V( 1 ); // 2's complement, little endian
+ WRITE_V( 1 ); // current version
+ WRITE_V( 0 ); // ABI = SysV
+ WRITE_V( 0 ); // ABI version
+
+ WRITE_DUP(0, 7); // padding
+
+ WRITE_2( 2 ); // executable
+ WRITE_2( 62 ); // x86_64
+ WRITE_4( 1 ); // current version
+ WRITE_8( entry ); // entry point address
+ WRITE_8( ehs ); // program header offset
+ WRITE_8( 0 ); // section header offset
+ WRITE_4( 0 ); // flags
+ WRITE_2( ehs ); // ELF header size
+ WRITE_2( phs ); // program header size
+ WRITE_2( 1 ); // program header count
+ WRITE_2( shs ); // section header size
+ WRITE_2( 0 ); // section header count
+ WRITE_2( 0 ); // string table section header index
+
+ // Program header
+ //
+
+ WRITE_4( 1 ); // type (PT_LOAD)
+ WRITE_4( 5 ); // flags (PF_X | PF_R)
+ WRITE_8( code_offset ); // offset
+ WRITE_8( entry ); // vaddr
+ WRITE_8( entry ); // paddr
+ WRITE_8( code_size ); // filesz
+ WRITE_8( code_size ); // memsz
+ WRITE_8( 8 ); // align
+
+ #undef WRITE_V
+ #undef WRITE_DUP
+ #undef WRITE_32
+ #undef WRITE_64
+
+ // Code
+ //
+
+ for (i64 i = code_offset - ehs - phs; i > 0; --i)
+ fwrite(&(u8) { 0 }, 1, 1, out);
+
+ fwrite(code, 1, code_size, out);
+}
+
+// Helpers implementation
+//
+
+#ifndef DISABLE_HELPERS
+
+// Global state
+//
+
+static entity_pool_t g_pool = {
+ .entities_capacity = MAX_ENTITY_COUNT,
+ .entities = (entity_t[MAX_ENTITY_COUNT]) { 0 },
+};
+
+// Handy procedures
+//
+
+i64 n_i64(i64 value) {
+ return node_op_i64(&g_pool, value);
+}
+
+i64 n_ret(i64 node_return_value) {
+ return node_op_ret(&g_pool, node_return_value);
+}
+
+i64 p_new(c8 const *name) {
+ i64 p = proc_init(&g_pool);
+ proc_set_name(&g_pool, p, strlen(name), name);
+ return p;
+}
+
+void p_add(i64 proc, i64 node) {
+ proc_node_add(&g_pool, proc, node);
+}
+
+i64 u_new() {
+ return unit_init(&g_pool);
+}
+
+void u_add(i64 unit, i64 proc) {
+ unit_proc_add(&g_pool, unit, proc);
+}
+
+void u_entry_point(i64 unit, i64 proc) {
+ unit_set_entry_point(&g_pool, unit, proc);
+}
+
+void u_elf_x86_64(i64 unit, c8 const *output_file_name) {
+ FILE *f = fopen(output_file_name, "wb");
+ assert(f != NULL);
+ unit_write(&g_pool, unit, FORMAT_ELF | ARCH_X86_64, f);
+#ifdef __unix__
+ fchmod(fileno(f), 0775);
+#endif
+ fclose(f);
+}
+
+#endif
+
+// Example
+//
+
+#ifndef DISABLE_EXAMPLE
+
+int main(int argc, char **argv) {
+ printf("node - %d bytes\n", (i32) sizeof(node_t));
+ printf("proc - %d bytes\n", (i32) sizeof(proc_t));
+ printf("unit - %d bytes\n", (i32) sizeof(unit_t));
+
+ i64 main = p_new("main");
+ i64 n0 = n_i64(42);
+ p_add(main, n0);
+ p_add(main, n_ret(n0));
+
+ i64 u = u_new();
+ u_add(u, main);
+ u_entry_point(u, main);
+ u_elf_x86_64(u, "test_foo");
+
+ printf("\nBye!\n");
+ return 0;
+}
+
+#endif
+
+#endif