From 9018ec15f750aa01ff090c9e2d80da6aea2e70dd Mon Sep 17 00:00:00 2001 From: Mitya Selivanov Date: Fri, 12 Jul 2024 02:21:58 +0200 Subject: Cleanup; Update TODO --- bxgen.c | 304 ++++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 164 insertions(+), 140 deletions(-) (limited to 'bxgen.c') diff --git a/bxgen.c b/bxgen.c index aa51e9c..1514508 100755 --- a/bxgen.c +++ b/bxgen.c @@ -37,11 +37,10 @@ exit $? // - ELF + x86_64 executable // - x86_64 object file // - Linking libraries -// - String table for names +// - String table for names and arrays // - Proper error handling // - Proper prefixes for identifiers // - Effective entity allocation -// - Hybrid-linked lists for large entities // - Implicit procedure prototypes // - Implicit exit after ret from entry point // - Static single-assignment @@ -157,18 +156,12 @@ enum { // Entity types // - ENTITY_TAIL = 0, - ENTITY_NODE, + ENTITY_NODE = 0, ENTITY_PROC, ENTITY_UNIT, // Limits // - // NOTE - // - // All limits can be exceeded using the linked list of entities - // (see `Entity::tail`), except for `MAX_ENTITY_COUNT`. - // MAX_LITERAL_SIZE = 400, MAX_NAME_SIZE = 80, @@ -220,7 +213,6 @@ typedef struct { // A semantic node is an operation with optional data // and possible references to other nodes. -// typedef struct { i16 op; @@ -235,7 +227,6 @@ typedef struct { // A procedure is a collection of semantic nodes // and has a string name. -// typedef struct { i16 convention; @@ -269,23 +260,14 @@ typedef struct { // // Every entity can be referenced by it's unique index // in the entity pool. -// -// If the entity's data doesn't fit in one entity, tail is -// an index that leads to the entity with the rest of the -// data, forming a linked list. -// typedef struct { b8 is_enabled; i16 type; - i64 tail; union { Node node; Proc proc; Unit unit; - c8 tail_chars[1]; - i64 tail_ids[1]; - Var tail_vars[1]; }; } Entity; @@ -293,7 +275,6 @@ typedef struct { // // NOTE // We use one single large memory block for *everything*. -// typedef struct { i64 entity_count; @@ -341,7 +322,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_id, void *io_user_data) i64 io_open_read(i64 name_size, c8 *name, void *user_data); i64 io_open_write(i64 name_size, c8 *name, void *user_data); void io_close(i64 f, void *user_data); -i64 io_seek(i64 f, i64 offset, u16 origin, void *user_data); +b8 io_seek(i64 f, i64 offset, u16 origin, void *user_data); i64 io_read(i64 f, i64 size, void *data, void *user_data); i64 io_write(i64 f, i64 size, void *data, void *user_data); void io_chmod_exe(i64 f, void *user_data); @@ -448,7 +429,6 @@ i64 node_init(Pool *pool, Node data) { return pool_add(pool, (Entity) { .type = ENTITY_NODE, - .tail = UNDEFINED, .node = data, }); } @@ -519,7 +499,6 @@ i64 node_ctrl_ret(Pool *pool, i64 value_count, Var *values) { i64 proc_init(Pool *pool) { return pool_add(pool, (Entity) { .type = ENTITY_PROC, - .tail = UNDEFINED, .proc = (Proc) { .ret_index = UNDEFINED, .index_in_unit = UNDEFINED, @@ -544,9 +523,6 @@ void proc_set_name(Pool *pool, i64 proc, i64 name_size, c8 *name) { BX_ASSERT(pool->entities[proc].is_enabled); BX_ASSERT(pool->entities[proc].type == ENTITY_PROC); - // TODO - // Implement large entities. - BX_ASSERT(name_size <= MAX_NAME_SIZE); BX_ASSERT(name_size >= 0); @@ -569,9 +545,6 @@ void proc_node_add(Pool *pool, i64 proc, i64 node) { BX_ASSERT(n->index_in_proc == UNDEFINED); - // TODO - // Implement large entities. - i64 index = p->node_count; if (n->op == CTRL_RET) @@ -599,9 +572,6 @@ void proc_node_remove(Pool *pool, i64 proc, i64 node) { Proc *p = &pool->entities[proc].proc; Node *n = &pool->entities[node].node; - // TODO - // Implement large entities. - BX_ASSERT(n->index_in_proc != UNDEFINED); BX_ASSERT(p->nodes[n->index_in_proc] == node); @@ -617,7 +587,6 @@ void proc_node_remove(Pool *pool, i64 proc, i64 node) { i64 unit_init(Pool *pool, i16 type) { return pool_add(pool, (Entity) { .type = ENTITY_UNIT, - .tail = UNDEFINED, .unit = (Unit) { .type = type, .entry_point_index = UNDEFINED, @@ -641,9 +610,6 @@ void unit_proc_add(Pool *pool, i64 unit, i64 proc) { BX_ASSERT(p->index_in_unit == UNDEFINED); - // TODO - // Implement large entities. - i64 index = u->proc_count; BX_ASSERT(index < MAX_PROC_COUNT); @@ -662,9 +628,6 @@ void unit_proc_remove(Pool *pool, i64 unit, i64 proc) { Unit *u = &pool->entities[unit].unit; Proc *p = &pool->entities[proc].proc; - // TODO - // Implement large entities. - BX_ASSERT(p->index_in_unit != UNDEFINED); BX_ASSERT(u->procs[p->index_in_unit] == proc); @@ -714,9 +677,6 @@ void unit_set_name(Pool *pool, i64 unit, i64 name_size, c8 *name) { BX_ASSERT(pool->entities[unit].is_enabled); BX_ASSERT(pool->entities[unit].type == ENTITY_UNIT); - // TODO - // Implement large entities. - BX_ASSERT(name_size <= MAX_NAME_SIZE); BX_ASSERT(name_size >= 0); @@ -756,6 +716,48 @@ void unit_set_entry_point(Pool *pool, i64 unit, i64 entry_point_proc) { #include // TEMP #include // TEMP +enum { + MAX_SECTION_SIZE = 1024 * 1024 * 10, + MAX_INPUT_SIZE = 1024 * 1024 * 100, + MAX_RELOCATIONS = 1024 * 10, + MAX_INPUT_SECTIONS = 1024 * 10, +}; + +typedef struct { + i64 memory_address; + i64 file_offset; + i64 size; + u8 bytes[MAX_SECTION_SIZE]; +} Section; + +typedef struct { + i64 src_section; + i64 src_file_offset; + i64 src_size; + i64 dst_section; + i64 dst_memory_address; +} Relocation; + +typedef struct { + i64 object_index; + i64 section_index; + i64 file_offset; + i64 size; +} Input_Section_Info; + +typedef struct { + Section exec; + Section read_only; + Section read_write; + Section zero_init; + i64 relocs_size; + Relocation relocs[MAX_RELOCATIONS]; + i64 input_raw_size; + u8 input_raw[MAX_INPUT_SIZE]; + i64 input_sections_size; + Input_Section_Info input_sections[MAX_INPUT_SECTIONS]; +} Binary_Output; + void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data) { // Docs // @@ -765,7 +767,6 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data // LLVM impl https://github.com/llvm/llvm-project/blob/main/lld/ELF/Driver.cpp#L2822 // https://github.com/llvm/llvm-project/blob/main/lld/ELF/Writer.cpp#L304 // https://github.com/llvm/llvm-project/blob/main/lld/ELF/OutputSections.cpp#L469 - // BX_ASSERT(pool != NULL && pool->entities != NULL); BX_ASSERT(pool->entities[unit].is_enabled); @@ -845,10 +846,10 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data WRITE_4( 1 ); // type (PT_LOAD) WRITE_4( 5 ); // flags (PF_X | PF_R) WRITE_8( code_offset ); // offset - WRITE_8( code_address ); // vaddr - WRITE_8( code_address ); // paddr - WRITE_8( code_size ); // filesz - WRITE_8( code_size ); // memsz + WRITE_8( code_address ); // virtual address + WRITE_8( code_address ); // phisical address + WRITE_8( code_size ); // size in file + WRITE_8( code_size ); // size in memory WRITE_8( 8 ); // align // Code @@ -890,8 +891,9 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data i64 n = 0, current_offset = 0; - // Read AR + // ================================================================ // + // Read AR library u8 magic[8]; @@ -1043,8 +1045,9 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data printf(" \n"); } - // Decode ELF object file + // ================================================================ // + // Decode ELF object file i64 byte_count = atoi(size); if ((byte_count & 1) == 1) @@ -1294,102 +1297,119 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data printf("\n"); - if (type == 2) { - // Find symbol addresses + switch (type) { + // ================================================================ // + // Symbols - BX_ASSERT(entsize == 24); + case 2: { + // Find symbol addresses + // - i64 prev_offset = current_offset; - i64 prev_byte_count = byte_count; - io_seek(f, - begin_offset + offset, - IO_SEEK_BEGIN, io_user_data); - current_offset = begin_offset + offset; + BX_ASSERT(entsize == 24); - printf("\n"); + i64 prev_offset = current_offset; + i64 prev_byte_count = byte_count; + io_seek(f, + begin_offset + offset, + IO_SEEK_BEGIN, io_user_data); + current_offset = begin_offset + offset; + + printf("\n"); - for (byte_count = size; byte_count > 0;) { - BX_ASSERT(symbol_names_found); - - u32 sym_name; - u8 sym_info; - u8 sym_other; - u16 sym_shndx; - u64 sym_value; - u64 sym_size; - - READ(sym_name); BX_ASSERT(n != 0); - READ(sym_info); BX_ASSERT(n != 0); - READ(sym_other); BX_ASSERT(n != 0); - READ(sym_shndx); BX_ASSERT(n != 0); - READ(sym_value); BX_ASSERT(n != 0); - READ(sym_size); BX_ASSERT(n != 0); - - printf(" "); - - if (sym_name != 0) { - if (sym_name < symbol_names_size) { - // Search for the symbol name in the string table - // - - i64 prev_offset = current_offset; - io_seek(f, - begin_offset + symbol_names_offset + sym_name, - IO_SEEK_BEGIN, io_user_data); - - i32 padding = 48; - - if ((sym_info & 0xf) == 1 || - (sym_info & 0xf) == 2) - printf("%s", "\x1b[32m"); - - printf("\""); - - for (;; --padding) { - c8 c; - n = io_read(f, 1, &c, io_user_data); if (n == 0) break; - if (c == '\0') - break; - printf("%c", c); - } - - printf("\""); - if ((sym_info & 0xf) == 1 || - (sym_info & 0xf) == 2) - printf("%s", "\x1b[37m"); - if (padding > 0) - printf("%*s", padding, ""); - - io_seek(f, prev_offset, IO_SEEK_BEGIN, io_user_data); - current_offset = prev_offset; + for (byte_count = size; byte_count > 0;) { + BX_ASSERT(symbol_names_found); + + u32 sym_name; + u8 sym_info; + u8 sym_other; + u16 sym_shndx; + u64 sym_value; + u64 sym_size; + + READ(sym_name); BX_ASSERT(n != 0); + READ(sym_info); BX_ASSERT(n != 0); + READ(sym_other); BX_ASSERT(n != 0); + READ(sym_shndx); BX_ASSERT(n != 0); + READ(sym_value); BX_ASSERT(n != 0); + READ(sym_size); BX_ASSERT(n != 0); + + printf(" "); + + if (sym_name != 0) { + if (sym_name < symbol_names_size) { + // Search for the symbol name in the string table + // + + i64 prev_offset = current_offset; + io_seek(f, + begin_offset + symbol_names_offset + sym_name, + IO_SEEK_BEGIN, io_user_data); + + i32 padding = 48; + + if ((sym_info & 0xf) == 1 || + (sym_info & 0xf) == 2) + printf("%s", "\x1b[32m"); + + printf("\""); + + for (;; --padding) { + c8 c; + n = io_read(f, 1, &c, io_user_data); if (n == 0) break; + if (c == '\0') + break; + printf("%c", c); + } + + printf("\""); + if ((sym_info & 0xf) == 1 || + (sym_info & 0xf) == 2) + printf("%s", "\x1b[37m"); + if (padding > 0) + printf("%*s", padding, ""); + + io_seek(f, prev_offset, IO_SEEK_BEGIN, io_user_data); + current_offset = prev_offset; + } else + printf("%-50d", sym_name); } else - printf("%-50d", sym_name); - } else - printf("%*s", 50, ""); - - printf("%08llx ", sym_value); - - printf("%-8s ", - (sym_info & 0xf) <= 4 ? (c8 const *[]) { - "No type", - "Data", - "Func", - "Section", - "File", - }[sym_info & 0xf] : "" - ); - - if (sym_size != 0) - printf("- %lld bytes", sym_size); + printf("%*s", 50, ""); + + printf("%08llx ", sym_value); // symbol address + + printf("%-8s ", + (sym_info & 0xf) <= 4 ? (c8 const *[]) { + "No type", + "Data", + "Func", + "Section", + "File", + }[sym_info & 0xf] : "" + ); + + if (sym_size != 0) + printf("- %lld bytes", sym_size); + printf("\n"); + } + printf("\n"); - } - printf("\n"); + io_seek(f, prev_offset, IO_SEEK_BEGIN, io_user_data); + current_offset = prev_offset; + byte_count = prev_byte_count; + } break; - io_seek(f, prev_offset, IO_SEEK_BEGIN, io_user_data); - current_offset = prev_offset; - byte_count = prev_byte_count; + // ================================================================ + // + // Relocarions with addends + + case 4: { + } break; + + // ================================================================ + + default:; } } @@ -1399,6 +1419,8 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data current_offset += byte_count; #undef READ + + // ================================================================ } } @@ -1408,6 +1430,8 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data "", symbols[symbol_index]); + // ================================================================ + io_close(f, io_user_data); } } @@ -1429,9 +1453,9 @@ void io_close(i64 f, void *user_data) { io_dispatch(IO_CLOSE, &f, NULL, NULL, user_data); } -i64 io_seek(i64 f, i64 offset, u16 origin, void *user_data) { +b8 io_seek(i64 f, i64 offset, u16 origin, void *user_data) { io_dispatch(IO_SEEK, &f, &offset, &origin, user_data); - return offset; + return 1; } i64 io_read(i64 f, i64 size, void *data, void *user_data) { @@ -1564,7 +1588,7 @@ static Pool g_pool = { // // TODO // Reallocate the memory block when necessary. - // + .capacity = MAX_ENTITY_COUNT, .entities = (Entity[MAX_ENTITY_COUNT]) { 0 }, }; -- cgit v1.2.3