summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2024-07-12 02:21:58 +0200
committerMitya Selivanov <automainint@guattari.tech>2024-07-12 02:21:58 +0200
commit9018ec15f750aa01ff090c9e2d80da6aea2e70dd (patch)
tree908ef5e0b1064da229d991940fa7a33c5ca76702
parented26b0f2c95215e231e440475523f2197e6c8e3b (diff)
downloadbxgen-9018ec15f750aa01ff090c9e2d80da6aea2e70dd.zip
Cleanup; Update TODO
-rwxr-xr-xbxgen.c304
1 files changed, 164 insertions, 140 deletions
diff --git a/bxgen.c b/bxgen.c
index aa51e9c..1514508 100755
--- a/bxgen.c
+++ b/bxgen.c
@@ -37,11 +37,10 @@ exit $?
// - ELF + x86_64 executable
// - x86_64 object file
// - Linking libraries
-// - String table for names
+// - String table for names and arrays
// - Proper error handling
// - Proper prefixes for identifiers
// - Effective entity allocation
-// - Hybrid-linked lists for large entities
// - Implicit procedure prototypes
// - Implicit exit after ret from entry point
// - Static single-assignment
@@ -157,18 +156,12 @@ enum {
// Entity types
//
- ENTITY_TAIL = 0,
- ENTITY_NODE,
+ ENTITY_NODE = 0,
ENTITY_PROC,
ENTITY_UNIT,
// Limits
//
- // NOTE
- //
- // All limits can be exceeded using the linked list of entities
- // (see `Entity::tail`), except for `MAX_ENTITY_COUNT`.
- //
MAX_LITERAL_SIZE = 400,
MAX_NAME_SIZE = 80,
@@ -220,7 +213,6 @@ typedef struct {
// A semantic node is an operation with optional data
// and possible references to other nodes.
-//
typedef struct {
i16 op;
@@ -235,7 +227,6 @@ typedef struct {
// A procedure is a collection of semantic nodes
// and has a string name.
-//
typedef struct {
i16 convention;
@@ -269,23 +260,14 @@ typedef struct {
//
// Every entity can be referenced by it's unique index
// in the entity pool.
-//
-// If the entity's data doesn't fit in one entity, tail is
-// an index that leads to the entity with the rest of the
-// data, forming a linked list.
-//
typedef struct {
b8 is_enabled;
i16 type;
- i64 tail;
union {
Node node;
Proc proc;
Unit unit;
- c8 tail_chars[1];
- i64 tail_ids[1];
- Var tail_vars[1];
};
} Entity;
@@ -293,7 +275,6 @@ typedef struct {
//
// NOTE
// We use one single large memory block for *everything*.
-//
typedef struct {
i64 entity_count;
@@ -341,7 +322,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_id, void *io_user_data)
i64 io_open_read(i64 name_size, c8 *name, void *user_data);
i64 io_open_write(i64 name_size, c8 *name, void *user_data);
void io_close(i64 f, void *user_data);
-i64 io_seek(i64 f, i64 offset, u16 origin, void *user_data);
+b8 io_seek(i64 f, i64 offset, u16 origin, void *user_data);
i64 io_read(i64 f, i64 size, void *data, void *user_data);
i64 io_write(i64 f, i64 size, void *data, void *user_data);
void io_chmod_exe(i64 f, void *user_data);
@@ -448,7 +429,6 @@ i64 node_init(Pool *pool, Node data) {
return pool_add(pool, (Entity) {
.type = ENTITY_NODE,
- .tail = UNDEFINED,
.node = data,
});
}
@@ -519,7 +499,6 @@ i64 node_ctrl_ret(Pool *pool, i64 value_count, Var *values) {
i64 proc_init(Pool *pool) {
return pool_add(pool, (Entity) {
.type = ENTITY_PROC,
- .tail = UNDEFINED,
.proc = (Proc) {
.ret_index = UNDEFINED,
.index_in_unit = UNDEFINED,
@@ -544,9 +523,6 @@ void proc_set_name(Pool *pool, i64 proc, i64 name_size, c8 *name) {
BX_ASSERT(pool->entities[proc].is_enabled);
BX_ASSERT(pool->entities[proc].type == ENTITY_PROC);
- // TODO
- // Implement large entities.
-
BX_ASSERT(name_size <= MAX_NAME_SIZE);
BX_ASSERT(name_size >= 0);
@@ -569,9 +545,6 @@ void proc_node_add(Pool *pool, i64 proc, i64 node) {
BX_ASSERT(n->index_in_proc == UNDEFINED);
- // TODO
- // Implement large entities.
-
i64 index = p->node_count;
if (n->op == CTRL_RET)
@@ -599,9 +572,6 @@ void proc_node_remove(Pool *pool, i64 proc, i64 node) {
Proc *p = &pool->entities[proc].proc;
Node *n = &pool->entities[node].node;
- // TODO
- // Implement large entities.
-
BX_ASSERT(n->index_in_proc != UNDEFINED);
BX_ASSERT(p->nodes[n->index_in_proc] == node);
@@ -617,7 +587,6 @@ void proc_node_remove(Pool *pool, i64 proc, i64 node) {
i64 unit_init(Pool *pool, i16 type) {
return pool_add(pool, (Entity) {
.type = ENTITY_UNIT,
- .tail = UNDEFINED,
.unit = (Unit) {
.type = type,
.entry_point_index = UNDEFINED,
@@ -641,9 +610,6 @@ void unit_proc_add(Pool *pool, i64 unit, i64 proc) {
BX_ASSERT(p->index_in_unit == UNDEFINED);
- // TODO
- // Implement large entities.
-
i64 index = u->proc_count;
BX_ASSERT(index < MAX_PROC_COUNT);
@@ -662,9 +628,6 @@ void unit_proc_remove(Pool *pool, i64 unit, i64 proc) {
Unit *u = &pool->entities[unit].unit;
Proc *p = &pool->entities[proc].proc;
- // TODO
- // Implement large entities.
-
BX_ASSERT(p->index_in_unit != UNDEFINED);
BX_ASSERT(u->procs[p->index_in_unit] == proc);
@@ -714,9 +677,6 @@ void unit_set_name(Pool *pool, i64 unit, i64 name_size, c8 *name) {
BX_ASSERT(pool->entities[unit].is_enabled);
BX_ASSERT(pool->entities[unit].type == ENTITY_UNIT);
- // TODO
- // Implement large entities.
-
BX_ASSERT(name_size <= MAX_NAME_SIZE);
BX_ASSERT(name_size >= 0);
@@ -756,6 +716,48 @@ void unit_set_entry_point(Pool *pool, i64 unit, i64 entry_point_proc) {
#include <stdio.h> // TEMP
#include <stdlib.h> // TEMP
+enum {
+ MAX_SECTION_SIZE = 1024 * 1024 * 10,
+ MAX_INPUT_SIZE = 1024 * 1024 * 100,
+ MAX_RELOCATIONS = 1024 * 10,
+ MAX_INPUT_SECTIONS = 1024 * 10,
+};
+
+typedef struct {
+ i64 memory_address;
+ i64 file_offset;
+ i64 size;
+ u8 bytes[MAX_SECTION_SIZE];
+} Section;
+
+typedef struct {
+ i64 src_section;
+ i64 src_file_offset;
+ i64 src_size;
+ i64 dst_section;
+ i64 dst_memory_address;
+} Relocation;
+
+typedef struct {
+ i64 object_index;
+ i64 section_index;
+ i64 file_offset;
+ i64 size;
+} Input_Section_Info;
+
+typedef struct {
+ Section exec;
+ Section read_only;
+ Section read_write;
+ Section zero_init;
+ i64 relocs_size;
+ Relocation relocs[MAX_RELOCATIONS];
+ i64 input_raw_size;
+ u8 input_raw[MAX_INPUT_SIZE];
+ i64 input_sections_size;
+ Input_Section_Info input_sections[MAX_INPUT_SECTIONS];
+} Binary_Output;
+
void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data) {
// Docs
//
@@ -765,7 +767,6 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
// LLVM impl https://github.com/llvm/llvm-project/blob/main/lld/ELF/Driver.cpp#L2822
// https://github.com/llvm/llvm-project/blob/main/lld/ELF/Writer.cpp#L304
// https://github.com/llvm/llvm-project/blob/main/lld/ELF/OutputSections.cpp#L469
- //
BX_ASSERT(pool != NULL && pool->entities != NULL);
BX_ASSERT(pool->entities[unit].is_enabled);
@@ -845,10 +846,10 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
WRITE_4( 1 ); // type (PT_LOAD)
WRITE_4( 5 ); // flags (PF_X | PF_R)
WRITE_8( code_offset ); // offset
- WRITE_8( code_address ); // vaddr
- WRITE_8( code_address ); // paddr
- WRITE_8( code_size ); // filesz
- WRITE_8( code_size ); // memsz
+ WRITE_8( code_address ); // virtual address
+ WRITE_8( code_address ); // phisical address
+ WRITE_8( code_size ); // size in file
+ WRITE_8( code_size ); // size in memory
WRITE_8( 8 ); // align
// Code
@@ -890,8 +891,9 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
i64 n = 0, current_offset = 0;
- // Read AR
+ // ================================================================
//
+ // Read AR library
u8 magic[8];
@@ -1043,8 +1045,9 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
printf(" <SYMBOLS NOT FOUND>\n");
}
- // Decode ELF object file
+ // ================================================================
//
+ // Decode ELF object file
i64 byte_count = atoi(size);
if ((byte_count & 1) == 1)
@@ -1294,102 +1297,119 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
printf("\n");
- if (type == 2) {
- // Find symbol addresses
+ switch (type) {
+ // ================================================================
//
+ // Symbols
- BX_ASSERT(entsize == 24);
+ case 2: {
+ // Find symbol addresses
+ //
- i64 prev_offset = current_offset;
- i64 prev_byte_count = byte_count;
- io_seek(f,
- begin_offset + offset,
- IO_SEEK_BEGIN, io_user_data);
- current_offset = begin_offset + offset;
+ BX_ASSERT(entsize == 24);
- printf("\n");
+ i64 prev_offset = current_offset;
+ i64 prev_byte_count = byte_count;
+ io_seek(f,
+ begin_offset + offset,
+ IO_SEEK_BEGIN, io_user_data);
+ current_offset = begin_offset + offset;
+
+ printf("\n");
- for (byte_count = size; byte_count > 0;) {
- BX_ASSERT(symbol_names_found);
-
- u32 sym_name;
- u8 sym_info;
- u8 sym_other;
- u16 sym_shndx;
- u64 sym_value;
- u64 sym_size;
-
- READ(sym_name); BX_ASSERT(n != 0);
- READ(sym_info); BX_ASSERT(n != 0);
- READ(sym_other); BX_ASSERT(n != 0);
- READ(sym_shndx); BX_ASSERT(n != 0);
- READ(sym_value); BX_ASSERT(n != 0);
- READ(sym_size); BX_ASSERT(n != 0);
-
- printf(" ");
-
- if (sym_name != 0) {
- if (sym_name < symbol_names_size) {
- // Search for the symbol name in the string table
- //
-
- i64 prev_offset = current_offset;
- io_seek(f,
- begin_offset + symbol_names_offset + sym_name,
- IO_SEEK_BEGIN, io_user_data);
-
- i32 padding = 48;
-
- if ((sym_info & 0xf) == 1 ||
- (sym_info & 0xf) == 2)
- printf("%s", "\x1b[32m");
-
- printf("\"");
-
- for (;; --padding) {
- c8 c;
- n = io_read(f, 1, &c, io_user_data); if (n == 0) break;
- if (c == '\0')
- break;
- printf("%c", c);
- }
-
- printf("\"");
- if ((sym_info & 0xf) == 1 ||
- (sym_info & 0xf) == 2)
- printf("%s", "\x1b[37m");
- if (padding > 0)
- printf("%*s", padding, "");
-
- io_seek(f, prev_offset, IO_SEEK_BEGIN, io_user_data);
- current_offset = prev_offset;
+ for (byte_count = size; byte_count > 0;) {
+ BX_ASSERT(symbol_names_found);
+
+ u32 sym_name;
+ u8 sym_info;
+ u8 sym_other;
+ u16 sym_shndx;
+ u64 sym_value;
+ u64 sym_size;
+
+ READ(sym_name); BX_ASSERT(n != 0);
+ READ(sym_info); BX_ASSERT(n != 0);
+ READ(sym_other); BX_ASSERT(n != 0);
+ READ(sym_shndx); BX_ASSERT(n != 0);
+ READ(sym_value); BX_ASSERT(n != 0);
+ READ(sym_size); BX_ASSERT(n != 0);
+
+ printf(" ");
+
+ if (sym_name != 0) {
+ if (sym_name < symbol_names_size) {
+ // Search for the symbol name in the string table
+ //
+
+ i64 prev_offset = current_offset;
+ io_seek(f,
+ begin_offset + symbol_names_offset + sym_name,
+ IO_SEEK_BEGIN, io_user_data);
+
+ i32 padding = 48;
+
+ if ((sym_info & 0xf) == 1 ||
+ (sym_info & 0xf) == 2)
+ printf("%s", "\x1b[32m");
+
+ printf("\"");
+
+ for (;; --padding) {
+ c8 c;
+ n = io_read(f, 1, &c, io_user_data); if (n == 0) break;
+ if (c == '\0')
+ break;
+ printf("%c", c);
+ }
+
+ printf("\"");
+ if ((sym_info & 0xf) == 1 ||
+ (sym_info & 0xf) == 2)
+ printf("%s", "\x1b[37m");
+ if (padding > 0)
+ printf("%*s", padding, "");
+
+ io_seek(f, prev_offset, IO_SEEK_BEGIN, io_user_data);
+ current_offset = prev_offset;
+ } else
+ printf("%-50d", sym_name);
} else
- printf("%-50d", sym_name);
- } else
- printf("%*s", 50, "");
-
- printf("%08llx ", sym_value);
-
- printf("%-8s ",
- (sym_info & 0xf) <= 4 ? (c8 const *[]) {
- "No type",
- "Data",
- "Func",
- "Section",
- "File",
- }[sym_info & 0xf] : ""
- );
-
- if (sym_size != 0)
- printf("- %lld bytes", sym_size);
+ printf("%*s", 50, "");
+
+ printf("%08llx ", sym_value); // symbol address
+
+ printf("%-8s ",
+ (sym_info & 0xf) <= 4 ? (c8 const *[]) {
+ "No type",
+ "Data",
+ "Func",
+ "Section",
+ "File",
+ }[sym_info & 0xf] : ""
+ );
+
+ if (sym_size != 0)
+ printf("- %lld bytes", sym_size);
+ printf("\n");
+ }
+
printf("\n");
- }
- printf("\n");
+ io_seek(f, prev_offset, IO_SEEK_BEGIN, io_user_data);
+ current_offset = prev_offset;
+ byte_count = prev_byte_count;
+ } break;
- io_seek(f, prev_offset, IO_SEEK_BEGIN, io_user_data);
- current_offset = prev_offset;
- byte_count = prev_byte_count;
+ // ================================================================
+ //
+ // Relocarions with addends
+
+ case 4: {
+ } break;
+
+ // ================================================================
+
+ default:;
}
}
@@ -1399,6 +1419,8 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
current_offset += byte_count;
#undef READ
+
+ // ================================================================
}
}
@@ -1408,6 +1430,8 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
"<FILE NOT FOUND>",
symbols[symbol_index]);
+ // ================================================================
+
io_close(f, io_user_data);
}
}
@@ -1429,9 +1453,9 @@ void io_close(i64 f, void *user_data) {
io_dispatch(IO_CLOSE, &f, NULL, NULL, user_data);
}
-i64 io_seek(i64 f, i64 offset, u16 origin, void *user_data) {
+b8 io_seek(i64 f, i64 offset, u16 origin, void *user_data) {
io_dispatch(IO_SEEK, &f, &offset, &origin, user_data);
- return offset;
+ return 1;
}
i64 io_read(i64 f, i64 size, void *data, void *user_data) {
@@ -1564,7 +1588,7 @@ static Pool g_pool = {
//
// TODO
// Reallocate the memory block when necessary.
- //
+
.capacity = MAX_ENTITY_COUNT,
.entities = (Entity[MAX_ENTITY_COUNT]) { 0 },
};