summaryrefslogtreecommitdiff
path: root/binary_code_generation.c
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2024-06-09 21:28:30 +0200
committerMitya Selivanov <automainint@guattari.tech>2024-06-09 21:28:30 +0200
commitcc1cd3f8f48dc506086715bb207dfbf175c11575 (patch)
tree8625d6ab13d4c5374b880b935bc1c1e15aca24ef /binary_code_generation.c
parent31f95ee266e507a3155a82899531851c7202f0a3 (diff)
downloadbxgen-cc1cd3f8f48dc506086715bb207dfbf175c11575.zip
Cleanup; docs
Diffstat (limited to 'binary_code_generation.c')
-rwxr-xr-xbinary_code_generation.c519
1 files changed, 0 insertions, 519 deletions
diff --git a/binary_code_generation.c b/binary_code_generation.c
deleted file mode 100755
index 053baa6..0000000
--- a/binary_code_generation.c
+++ /dev/null
@@ -1,519 +0,0 @@
-#if 0
-SRC=${0##*/}
-BIN=${SRC%.*}
-gcc -o $BIN.tmp $SRC && ./$BIN.tmp $@ && rm $BIN.tmp
-exit $?
-#endif
-
-// ================================================================
-//
-// Binary code generation compiler backend
-//
-// Qualities
-//
-// - Single source file
-// - Simple and flexible API
-// - No external dependencies
-// - No configuration required
-// - No dynamic memory management
-//
-// To-Do list
-//
-// - ELF + x86_64 executable
-// - x86_64 object file
-// - Linking libraries
-// - Proper error handling
-// - Effective entity allocation
-// - Linked lists for large entities
-// - Sea of Nodes
-// - Optimization layers
-// - Multithreading
-// - Memory reallocation when necessary
-// - COFF, PE, OMF, Mach-O
-// - i386, RISC-V, ARM
-//
-// Done
-//
-// - ELF header
-//
-// ================================================================
-//
-// Basic declarations
-//
-// ================================================================
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#ifdef __unix__
-#include <sys/types.h>
-#include <sys/stat.h>
-#endif
-
-typedef signed char i8;
-typedef signed short i16;
-typedef signed int i32;
-typedef signed long long i64;
-typedef unsigned char u8;
-typedef unsigned short u16;
-typedef unsigned int u32;
-typedef unsigned long long u64;
-typedef float f32;
-typedef double f64;
-typedef signed char b8;
-typedef int b32;
-typedef char c8;
-
-// ================================================================
-//
-// Intermediate representation
-//
-// ================================================================
-
-// Declarations
-//
-
-enum {
- // For indices
- UNDEFINED = -1,
-
- // Formats
- //
-
- FORMAT_ELF = 1,
- FORMAT_COFF,
- FORMAT_PE,
- FORMAT_OMF,
- FORMAT_MATCH_O,
-
- // Architecture
- //
-
- ARCH_RISC_V = 64,
- ARCH_I386,
- ARCH_X86_64,
- ARCH_ARM,
-
- // Sea of Nodes flow type
- //
-
- FLOW_DATA = 0,
- FLOW_CONTROL,
-
- // Semantic node operations
- //
-
- OP_I64 = 0,
- OP_RET,
-
- // Entity types
- //
-
- TYPE_NODE = 0,
- TYPE_PROC,
- TYPE_UNIT,
-
- // Limits
- //
- // NOTE
- //
- // All limits can be exceeded using the linked list of entities
- // (see `entity_t::tail`), except for `MAX_ENTITY_COUNT`.
- //
-
- MAX_LITERAL_SIZE = 400,
- MAX_NAME_SIZE = 128,
- MAX_PROC_COUNT = 80,
- MAX_NODE_COUNT = 60,
- MAX_ENTITY_COUNT = 16384,
-};
-
-// A semantic node is an operation with optional data
-// and possible references to other nodes.
-//
-
-typedef struct {
- i16 op;
- i8 flow;
- union {
- u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal
- i64 lit_int; // integer literal
- i64 ref_node[4]; // references to other nodes
- };
-} node_t;
-
-// A procedure is a collection of semantic nodes
-// and has a string name.
-//
-
-typedef struct {
- i64 name_size;
- c8 name[MAX_NAME_SIZE];
- i64 node_count;
- i64 nodes[MAX_NODE_COUNT];
-} proc_t;
-
-// A compilation unit is a collection of procedures.
-//
-
-typedef struct {
- i64 entry_point;
- i64 proc_count;
- i64 procs[MAX_PROC_COUNT];
-} unit_t;
-
-// An entity can be any of:
-// - `node_t`
-// - `proc_t`
-// - `unit_t`
-//
-// Every entity can be referenced by it's unique index
-// in the entity pool.
-//
-// If the entity's data doesn't fit in one entity, tail is
-// an index that leads to the entity with the rest of the
-// data, forming a linked list.
-//
-
-typedef struct {
- b8 is_enabled;
- i16 type;
- i64 tail;
- union {
- node_t node;
- proc_t proc;
- unit_t unit;
- };
-} entity_t;
-
-// Pool, a collection of all entities.
-//
-
-typedef struct {
- i64 entity_count;
- entity_t entities[MAX_ENTITY_COUNT];
-} entity_pool_t;
-
-// IR building procs
-//
-
-i64 pool_entity_add(entity_pool_t *pool, entity_t data) {
- assert(pool != NULL);
- assert(pool->entity_count < MAX_ENTITY_COUNT);
-
- i64 id = pool->entity_count++;
- data.is_enabled = 1,
- pool->entities[id] = data;
-
- return id;
-}
-
-void pool_entity_remove(entity_pool_t *pool, i64 entity, i16 type) {
- assert(pool != NULL);
- assert(pool->entities[entity].is_enabled);
- assert(pool->entities[entity].type == type);
-
- pool->entities[entity].is_enabled = 1;
-}
-
-i64 node_init(entity_pool_t *pool, node_t data) {
- return pool_entity_add(pool, (entity_t) {
- .type = TYPE_NODE,
- .tail = UNDEFINED,
- .node = data,
- });
-}
-
-void node_destroy(entity_pool_t *pool, i64 node) {
- pool_entity_remove(pool, node, TYPE_NODE);
-}
-
-i64 node_op_i64(entity_pool_t *pool, i64 value) {
- return node_init(pool, (node_t) {
- .op = OP_I64,
- .flow = FLOW_DATA,
- .lit_int = value,
- });
-}
-
-i64 node_op_ret(entity_pool_t *pool, i64 node_return_value) {
- return node_init(pool, (node_t) {
- .op = OP_RET,
- .flow = FLOW_CONTROL,
- .ref_node = node_return_value,
- });
-}
-
-i64 proc_init(entity_pool_t *pool) {
- return pool_entity_add(pool, (entity_t) {
- .type = TYPE_PROC,
- .tail = UNDEFINED,
- });
-}
-
-void proc_destroy(entity_pool_t *pool, i64 proc) {
- pool_entity_remove(pool, proc, TYPE_PROC);
-}
-
-void proc_set_name(entity_pool_t *pool, i64 proc, i64 name_size, c8 const *name) {
- assert(pool != NULL);
- assert(pool->entities[proc].is_enabled);
- assert(pool->entities[proc].type == TYPE_PROC);
-
- // TODO
- // Implement large entities.
- assert(name_size <= MAX_NAME_SIZE);
- assert(name_size > 0);
-
- proc_t *p = &pool->entities[proc].proc;
- p->name_size = name_size;
- memcpy(p->name, name, name_size);
-}
-
-void proc_node_add(entity_pool_t *pool, i64 proc, i64 node) {
- assert(pool != NULL);
- assert(pool->entities[proc].is_enabled);
- assert(pool->entities[proc].type == TYPE_PROC);
- assert(pool->entities[node].is_enabled);
- assert(pool->entities[node].type == TYPE_NODE);
-
- proc_t *p = &pool->entities[proc].proc;
-
- // TODO
- // Implement large entities.
- assert(p->node_count < MAX_NODE_COUNT);
-
- p->nodes[p->node_count++] = node;
-}
-
-void proc_node_remove(entity_pool_t *pool, i64 proc, i64 node) {
- assert(pool != NULL);
- assert(pool->entities[proc].is_enabled);
- assert(pool->entities[proc].type == TYPE_PROC);
- assert(pool->entities[node].type == TYPE_NODE);
-
- pool->entities[proc].proc.nodes[node] = UNDEFINED;
-}
-
-i64 unit_init(entity_pool_t *pool) {
- return pool_entity_add(pool, (entity_t) {
- .type = TYPE_UNIT,
- .tail = UNDEFINED,
- .unit = (unit_t) {
- .entry_point = UNDEFINED,
- }
- });
-}
-
-void unit_destroy(entity_pool_t *pool, i64 unit) {
- pool_entity_remove(pool, unit, TYPE_UNIT);
-}
-
-void unit_proc_add(entity_pool_t *pool, i64 unit, i64 proc) {
- assert(pool != NULL);
- assert(pool->entities[unit].is_enabled);
- assert(pool->entities[unit].type == TYPE_UNIT);
- assert(pool->entities[proc].is_enabled);
- assert(pool->entities[proc].type == TYPE_PROC);
-
- unit_t *u = &pool->entities[unit].unit;
-
- // TODO
- // Implement large entities.
- assert(u->proc_count < MAX_PROC_COUNT);
-
- u->procs[u->proc_count++] = proc;
-}
-
-void unit_proc_remove(entity_pool_t *pool, i64 unit, i64 proc) {
- assert(pool != NULL);
- assert(pool->entities[unit].is_enabled);
- assert(pool->entities[unit].type == TYPE_UNIT);
- assert(pool->entities[proc].type == TYPE_PROC);
-
- pool->entities[unit].unit.procs[proc] = UNDEFINED;
-}
-
-void unit_set_entry_point(entity_pool_t *pool, i64 unit, i64 entry_point_proc) {
- assert(pool != NULL);
- assert(pool->entities[unit].is_enabled);
- assert(pool->entities[unit].type == TYPE_UNIT);
- assert(pool->entities[entry_point_proc].type == TYPE_PROC);
-
- pool->entities[unit].unit.entry_point = entry_point_proc;
-}
-
-// ================================================================
-//
-// Code generation proc
-//
-// ================================================================
-
-void unit_write(entity_pool_t *pool, i64 unit, i16 target, FILE *out) {
- assert(pool != NULL);
- assert(pool->entities[unit].is_enabled);
- assert(pool->entities[unit].unit.entry_point != UNDEFINED);
- assert(out != NULL);
- assert(target == (FORMAT_ELF | ARCH_X86_64));
-
- printf("Writing ELF x86_64 executable...\n");
-
- u16 ehs = 64;
- u16 shs = 0;
- u16 phs = 56;
- u64 align = 8;
-
- u8 code[16] = {
- 0xb8, // mov rax
- 0x3c, 0, 0, 0, // 60 // exit
- 0x48, 0x31, 0xff, // xor rdx, rdx // rdx = 0
- 0x0f, 0x05, // syscall
- };
-
- u64 code_offset = (ehs + phs + (align - 1)) & (~(align - 1));
- u64 code_size = sizeof code;
-
- assert((code_offset % align) == 0);
- assert((code_size % align) == 0);
-
- u64 entry = 0x400000 + ehs + phs;
-
- // ELF header
- //
-
- #define WRITE_V(...) fwrite(&(u8[]) {__VA_ARGS__}, 1, sizeof((u8[]) {__VA_ARGS__}), out)
- #define WRITE_DUP(x, n) fwrite( (u8[n]) { 0 }, 1, n, out)
- #define WRITE_2(x) fwrite(&(u16) { x }, 2, 1, out)
- #define WRITE_4(x) fwrite(&(u32) { x }, 4, 1, out)
- #define WRITE_8(x) fwrite(&(u64) { x }, 8, 1, out)
-
- WRITE_V( 0x7f, 'E', 'L', 'F' ); // magic
-
- WRITE_V( 2 ); // elf64
- WRITE_V( 1 ); // 2's complement, little endian
- WRITE_V( 1 ); // current version
- WRITE_V( 0 ); // ABI = SysV
- WRITE_V( 0 ); // ABI version
-
- WRITE_DUP(0, 7); // padding
-
- WRITE_2( 2 ); // executable
- WRITE_2( 62 ); // x86_64
- WRITE_4( 1 ); // current version
- WRITE_8( entry ); // entry point address
- WRITE_8( ehs ); // program header offset
- WRITE_8( 0 ); // section header offset
- WRITE_4( 0 ); // flags
- WRITE_2( ehs ); // ELF header size
- WRITE_2( phs ); // program header size
- WRITE_2( 1 ); // program header count
- WRITE_2( shs ); // section header size
- WRITE_2( 0 ); // section header count
- WRITE_2( 0 ); // string table section header index
-
- // Program header
- //
-
- WRITE_4( 1 ); // type (PT_LOAD)
- WRITE_4( 5 ); // flags (PF_X | PF_R)
- WRITE_8( code_offset ); // offset
- WRITE_8( entry ); // vaddr
- WRITE_8( entry ); // paddr
- WRITE_8( code_size ); // filesz
- WRITE_8( code_size ); // memsz
- WRITE_8( 8 ); // align
-
- #undef WRITE_V
- #undef WRITE_DUP
- #undef WRITE_32
- #undef WRITE_64
-
- // Code
- //
-
- for (i64 i = code_offset - ehs - phs; i > 0; --i)
- fwrite(&(u8) { 0 }, 1, 1, out);
-
- fwrite(code, 1, code_size, out);
-}
-
-// ================================================================
-//
-// Helpers
-//
-// ================================================================
-
-// Global state
-//
-
-static entity_pool_t g_pool = { 0 };
-
-// Handy procedures
-//
-
-i64 n_i64(i64 value) {
- return node_op_i64(&g_pool, value);
-}
-
-i64 n_ret(i64 node_return_value) {
- return node_op_ret(&g_pool, node_return_value);
-}
-
-i64 p_new(c8 const *name) {
- i64 p = proc_init(&g_pool);
- proc_set_name(&g_pool, p, strlen(name), name);
- return p;
-}
-
-void p_add(i64 proc, i64 node) {
- proc_node_add(&g_pool, proc, node);
-}
-
-i64 u_new() {
- return unit_init(&g_pool);
-}
-
-void u_add(i64 unit, i64 proc) {
- unit_proc_add(&g_pool, unit, proc);
-}
-
-void u_entry_point(i64 unit, i64 proc) {
- unit_set_entry_point(&g_pool, unit, proc);
-}
-
-void u_elf_x86_64(i64 unit, c8 const *output_file_name) {
- FILE *f = fopen(output_file_name, "wb");
- assert(f != NULL);
- unit_write(&g_pool, unit, FORMAT_ELF | ARCH_X86_64, f);
-#ifdef __unix__
- fchmod(fileno(f), 0775);
-#endif
- fclose(f);
-}
-
-// ================================================================
-//
-// Example
-//
-// ================================================================
-
-int main(int argc, char **argv) {
- printf("node - %d bytes\n", (i32) sizeof(node_t));
- printf("proc - %d bytes\n", (i32) sizeof(proc_t));
- printf("unit - %d bytes\n", (i32) sizeof(unit_t));
-
- i64 main = p_new("main");
- i64 n0 = n_i64(42);
- p_add(main, n0);
- p_add(main, n_ret(n0));
-
- i64 u = u_new();
- u_add(u, main);
- u_entry_point(u, main);
- u_elf_x86_64(u, "test_foo");
-
- printf("\nBye!\n");
- return 0;
-}