diff options
author | Mitya Selivanov <automainint@guattari.tech> | 2024-08-01 06:47:59 +0200 |
---|---|---|
committer | Mitya Selivanov <automainint@guattari.tech> | 2024-08-01 06:47:59 +0200 |
commit | 4bc7fe83b94e400ac190a6140ce9792a1c1ba106 (patch) | |
tree | 7cd1464174d57361f0ecbf06f90f54eff37b5192 | |
parent | de0850e7f8e62e4b3873e63aee10d9253b006c5f (diff) | |
download | bxgen-4bc7fe83b94e400ac190a6140ce9792a1c1ba106.zip |
Global offset table
-rwxr-xr-x | bxgen.c | 631 |
1 files changed, 265 insertions, 366 deletions
@@ -27,9 +27,11 @@ #/ #/ To-Do list #/ +#/ - Simplify +#/ - Use 0 for UNDEFINED. Make the zero value useful +#/ - Factor out checks #/ - Library #/ - Terminal color option -#/ - Use 0 for UNDEFINED. Make the zero value useful #/ - String table for names and arrays #/ - Proper prefixes for identifiers #/ - Effective entity allocation @@ -84,8 +86,7 @@ #/ #/ Bugs #/ -#/ - Segfault at 0x58fe2d (after 0x401c3f, 0x58fe2d) -#/ - in 0x58fabc +#/ - Segfault at 0x45bb82 #/ #/ Done features #/ @@ -195,6 +196,7 @@ enum { MAX_NUM_SECTIONS = 2 * 1024 * 1024, MAX_NUM_SYMBOLS = 2 * 1024 * 1024, MAX_NUM_RELS = 100 * 1024, + MAX_NUM_LINK_RELS = 2 * 1024 * 1024, MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB MAX_DEPENDENCIES_SIZE = 50 * 1024 * 1024, // 50 MB MAX_NOT_FOUND_SIZE = 10 * 1024, // 10 KB @@ -433,11 +435,21 @@ typedef struct { } Rel_Entry; typedef struct { + i64 offset; + i64 address; +} Link_Sec_Entry; + +typedef struct { i64 name_size; c8 *name; i64 address; i64 size; -} Symbol_Entry; + i64 got_offset; +} Link_Sym_Entry; + +typedef struct { + i64 symbol; +} Link_Rel_Entry; // Pool, a collection of all entities. // @@ -473,19 +485,20 @@ typedef struct { i64 max_num_obj_files; i64 max_num_sections; i64 max_num_symbols; + i64 max_num_rels; i64 max_not_found_size; i64 max_output_size; i64 num_obj_files; - u8 * obj_file_buffer; - u8 * dependencies_buffer; - i64 * obj_file_offsets; - i64 * section_offsets; - i64 * section_addresses; - Symbol_Entry *symbols; - c8 * not_found_buffer; - u8 * output_buffer; + u8 * obj_file_buffer; + u8 * dependencies_buffer; + i64 * obj_file_offsets; + Link_Sec_Entry *sections; + Link_Sym_Entry *symbols; + Link_Rel_Entry *rels; + c8 * not_found_buffer; + u8 * output_buffer; } Linker_Context; // ================================================================ @@ -695,6 +708,13 @@ b8 mem_eq(void *a, void *b, i64 size) { return 1; } +b8 str_eq(i64 a_len, c8 *a, i64 b_len, c8 *b) { + return a_len == b_len && mem_eq(a, b, a_len); +} + +#define STR_EQ(z, a, b) \ + (z == sizeof(b) - 1 && mem_eq((a), (b), sizeof(b) - 1)) + i64 str_len(c8 *s, c8 *s_end) { CHECK(s < s_end, "Buffer overflow", 0); @@ -751,7 +771,7 @@ c8 *find_str_in_table(c8 *buf, c8 *buf_end, c8 *sub, c8 *sub_end) { while (buf < buf_end) { i64 len = str_len(buf, buf_end); - if (sub_end - sub == len && mem_eq(buf, sub, len)) + if (str_eq(len, buf, sub_end - sub, sub)) return buf; buf += len + 1; } @@ -2050,7 +2070,7 @@ void x86_64_emit_node( write_u8(LE, 0xc4, begin + 84, end); // write_u8(LE, 16, begin + 85, end); // 16 - codegen->offset_code += 86; + codegen->offset_code += 86; codegen->offset_ro_data += dat_2->lit.num_bytes; } break; @@ -2294,8 +2314,7 @@ i64 elf_find_section_index_by_name( i64 name_index = (i64) read_u32(LE, begin, b.end); Offset_Size s = elf_name_in_string_table(b, names, name_index); - if (s.size == name_size && - mem_eq(b.begin + s.offset, name, name_size)) + if (str_eq(s.size, (c8 *) b.begin + s.offset, name_size, name)) return i; } @@ -2488,7 +2507,7 @@ Elf_Symbol_Entry elf_find_symbol_by_name( CHECK(b.begin + sym.name.offset + name_size <= b.end, "Buffer overflow", (Elf_Symbol_Entry) {0}); CHECK(sym.name.offset + name_size <= b.elf.size, "Buffer overflow", (Elf_Symbol_Entry) {0}); - if (name_size == sym.name.size && mem_eq(name, b.begin + sym.name.offset, name_size)) + if (str_eq(name_size, name, sym.name.size, (c8 *) b.begin + sym.name.offset)) return sym; } @@ -2501,146 +2520,27 @@ void elf_checks(Buffer_Context b) { u8 osabi = read_u8(LE, begin + 7, end); - CHECK( read_u8 (LE, begin, end) == ELF_MAGIC[0], "Invalid ELF file",); - CHECK( read_u8 (LE, begin + 1, end) == ELF_MAGIC[1], "Invalid ELF file",); - CHECK( read_u8 (LE, begin + 2, end) == ELF_MAGIC[2], "Invalid ELF file",); - CHECK( read_u8 (LE, begin + 3, end) == ELF_MAGIC[3], "Invalid ELF file",); - - CHECK( read_u8 (LE, begin + 4, end) == ELF_64, "Unsupported ELF file",); - CHECK( read_u8 (LE, begin + 5, end) == ELF_2_LE, "Unsupported ELF file",); - CHECK( read_u8 (LE, begin + 6, end) == ELF_VERSION, "Unsupported ELF file",); - CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",); - CHECK( read_u8 (LE, begin + 8, end) == ELF_ABI_VERSION, "Unsupported ELF file",); - CHECK( read_u16(LE, begin + 16, end) == ELF_RELOCATABLE, "Unsupported ELF file",); - CHECK( read_u16(LE, begin + 18, end) == ELF_X86_64, "Unsupported ELF file",); - CHECK( read_u32(LE, begin + 20, end) == ELF_VERSION, "Unsupported ELF file",); - - LAX( read_u64(LE, begin + 24, end) == 0, "Invalid entry point"); - LAX( read_u64(LE, begin + 32, end) == 0, "Invalid program header offset"); - LAX( read_u32(LE, begin + 48, end) == 0, "Invalid flags"); - LAX( read_u16(LE, begin + 52, end) == ELF_HEADER_SIZE, "Invalid ELF header size"); - LAX( read_u16(LE, begin + 54, end) == 0, "Invalid program header size"); - LAX( read_u16(LE, begin + 56, end) == 0, "Invalid num program headers"); - LAX( read_u16(LE, begin + 58, end) == ELF_SECTION_HEADER_SIZE, "Invalid section header size"); -} - -void elf_dump(u32 log_level, Buffer_Context b, b8 term_color) { - Offset_Num headers = elf_section_headers(b); - Offset_Size strtab = elf_find_section_by_name(b, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - Offset_Size symtab = elf_find_section_by_name(b, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; - - for (i64 sec_index = 1; sec_index < headers.num; ++sec_index) { - Elf_Section_Header section = elf_section(b, sec_index); - - c8 *name = elf_name_from_offset(b, section.name); - - LOG( - log_level, - "\"%s%s%s\"%*s%-14s%s%s%s%s%lld%s", - !term_color ? "" : - section.type == SEC_SYMTAB || - section.type == SEC_RELA || - section.type == SEC_REL ? "\x1b[32m" : - section.alloc ? "\x1b[34m" : - section.type == SEC_STRTAB ? "\x1b[33m" : - "\x1b[31m", - name, - !term_color ? "" : "\x1b[37m", - (i32) (section.name.size < 30 ? 30 - section.name.size : 1), - "", - SEC_TYPE_NAMES[section.type], - section.alloc ? "R" : "_", - section.write ? "W" : "_", - section.exec ? "X" : "_", - section.data.size > 0 ? " - " : "", - section.data.size, - section.data.size > 0 ? " bytes" : "\b " - ); - - switch (section.type) { - case SEC_SYMTAB: - LOG(log_level, " - -"); - - for (i64 sym_index = 1; sym_index < section.num_entries; ++sym_index) { - Elf_Symbol_Entry sym = elf_symbol(b, section.data, strtab, (u16) sym_index); - - c8 *name = elf_name_from_offset(b, sym.name); - - i32 len = (sym.name.size == 0) ? 4 : (i32) sym.name.size; - - LOG( - log_level, - " %08llx %-04llx %s%s%s%s%s %.*s %s%-7s %s%s", - section.data.offset + sym.value.offset, - sym.value.size, - *name != '\0' ? "\"" : "", - !term_color ? "" : - sym.bind == BIND_GLOBAL ? "\x1b[32m" : - sym.bind == BIND_WEAK ? "\x1b[35m" : - "\x1b[31m", - *name != '\0' ? name : "<NONE>", - !term_color ? "" : "\x1b[37m", - *name != '\0' ? "\"" : "", - 31 < len ? 1 : 32 - len, - 31 < len ? " " : "........................................", - !term_color ? "" : - sym.type == SYM_PROC ? "\x1b[32m" : - sym.type == SYM_DATA ? "\x1b[32m" : - sym.type == SYM_COMMON ? "\x1b[33m" : - sym.type == SYM_TLS ? "\x1b[35m" : - sym.type == SYM_SECTION ? "\x1b[31m" : - sym.type == SYM_SPECIFIC ? "\x1b[31m" : - "", - SYM_TYPE_NAMES[sym.type], - !term_color ? (sym.section == 0 ? "undefined" : "") : - sym.section == 0 ? ( - sym.bind == BIND_GLOBAL || sym.bind == BIND_WEAK ? - "\x1b[33mundefined" : "\x1b[31mundefined") : "", - !term_color ? "" : "\x1b[37m" - ); - } - - LOG(log_level, " - -"); - break; - - case SEC_REL: - case SEC_RELA: { - LOG(log_level, " - -"); - - for (i64 relx_index = 0; relx_index < section.num_entries; ++relx_index) { - Elf_Relx_Entry relx = elf_relx(b, symtab, strtab, section.data, relx_index, section.type == SEC_RELA); - - LOG( - log_level, - " %-16s %08llx %-+5lld <= %s%08llx%s%s%s \"%s\"", - REL_NAMES[relx.type], - relx.offset, - relx.addent, - !term_color ? "" : - relx.symbol.bind == BIND_WEAK ? "\x1b[33m" : "\x1b[32m", - relx.symbol.value.offset + elf_section(b, relx.symbol.section).data.offset, - !term_color ? "" : "\x1b[37m", - !term_color ? "" : - relx.symbol.type == SYM_DATA ? " \x1b[34mdata" : - relx.symbol.type == SYM_COMMON ? " \x1b[32mdata" : - relx.symbol.type == SYM_TLS ? " \x1b[34mdata" : - relx.symbol.type == SYM_PROC ? " \x1b[34mproc" : - relx.symbol.type == SYM_SECTION ? " \x1b[36msect" : - relx.symbol.type == SYM_SPECIFIC ? " \x1b[34mspec" : - " \x1b[33mnone", - !term_color ? "" : "\x1b[37m", - elf_name_from_offset(b, relx.symbol.name) - ); - } - - LOG(log_level, " - -"); - } break; - - default:; - } - } - - LOG(log_level, ""); + CHECK( read_u8 (LE, begin, end) == ELF_MAGIC[0], "Invalid ELF file",); + CHECK( read_u8 (LE, begin + 1, end) == ELF_MAGIC[1], "Invalid ELF file",); + CHECK( read_u8 (LE, begin + 2, end) == ELF_MAGIC[2], "Invalid ELF file",); + CHECK( read_u8 (LE, begin + 3, end) == ELF_MAGIC[3], "Invalid ELF file",); + + CHECK( read_u8 (LE, begin + 4, end) == ELF_64, "Unsupported ELF file",); + CHECK( read_u8 (LE, begin + 5, end) == ELF_2_LE, "Unsupported ELF file",); + CHECK( read_u8 (LE, begin + 6, end) == ELF_VERSION, "Unsupported ELF file",); + CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",); + CHECK( read_u8 (LE, begin + 8, end) == ELF_ABI_VERSION, "Unsupported ELF file",); + CHECK( read_u16(LE, begin + 16, end) == ELF_RELOCATABLE, "Unsupported ELF file",); + CHECK( read_u16(LE, begin + 18, end) == ELF_X86_64, "Unsupported ELF file",); + CHECK( read_u32(LE, begin + 20, end) == ELF_VERSION, "Unsupported ELF file",); + + LAX(read_u64(LE, begin + 24, end) == 0, "Invalid entry point"); + LAX(read_u64(LE, begin + 32, end) == 0, "Invalid program header offset"); + LAX(read_u32(LE, begin + 48, end) == 0, "Invalid flags"); + LAX(read_u16(LE, begin + 52, end) == ELF_HEADER_SIZE, "Invalid ELF header size"); + LAX(read_u16(LE, begin + 54, end) == 0, "Invalid program header size"); + LAX(read_u16(LE, begin + 56, end) == 0, "Invalid num program headers"); + LAX(read_u16(LE, begin + 58, end) == ELF_SECTION_HEADER_SIZE, "Invalid section header size"); } i64 unit_write_in_memory( @@ -2662,7 +2562,7 @@ i64 unit_write_in_memory( emit_unit(pool, codegen, unit, arch); u16 num_program_headers = 7; - i64 program_offset = align(ELF_HEADER_SIZE + ELF_PROGRAM_HEADER_SIZE * num_program_headers, X86_64_ALIGNMENT); + i64 program_offset = align(ELF_HEADER_SIZE + ELF_PROGRAM_HEADER_SIZE * num_program_headers, X86_64_PAGE_SIZE); i64 base_address = X86_64_BASE_ADDRESS; i64 rx_code_address = base_address + program_offset; @@ -2686,7 +2586,6 @@ i64 unit_write_in_memory( Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index); elf_checks(buf); - // elf_dump(VERBOSE, buf, 1); Offset_Num headers = elf_section_headers(buf); @@ -2697,15 +2596,13 @@ i64 unit_write_in_memory( c8 *name = elf_name_from_offset(buf, section.name); - if (section.type == SEC_PROGBITS && - section.name.size == sizeof SECTION_GOT - 1 && - mem_eq(name, SECTION_GOT, sizeof SECTION_GOT - 1)) { + if (section.type == SEC_PROGBITS && + STR_EQ(section.name.size, name, SECTION_GOT)) { FAIL("Not implemented", 0); } - if (section.type == SEC_PROGBITS && - section.name.size == sizeof SECTION_PLT - 1 && - mem_eq(name, SECTION_PLT, sizeof SECTION_PLT - 1)) { + if (section.type == SEC_PROGBITS && + STR_EQ(section.name.size, name, SECTION_PLT)) { FAIL("Not implemented", 0); } @@ -2716,64 +2613,51 @@ i64 unit_write_in_memory( FAIL("Not implemented", 0); } - if (section.type == SEC_PROGBITS && section.exec && - section.name.size == sizeof SECTION_INIT - 1 && - mem_eq(name, SECTION_INIT, sizeof SECTION_INIT - 1)) { + if (section.type == SEC_PROGBITS && section.exec && + STR_EQ(section.name.size, name, SECTION_INIT)) { FAIL("Not implemented", 0); } - if (section.type == SEC_PROGBITS && section.exec && - section.name.size == sizeof SECTION_FINI - 1 && - mem_eq(name, SECTION_FINI, sizeof SECTION_FINI - 1)) { + if (section.type == SEC_PROGBITS && section.exec && + STR_EQ(section.name.size, name, SECTION_FINI)) { FAIL("Not implemented", 0); } if (section.exec) { CHECK(!section.write, "Not implemented", 0); - linker->section_offsets[num_sections_total] = rx_code_size; - linker->section_addresses[num_sections_total] = rx_code_size; + linker->sections[num_sections_total].offset = rx_code_size; + linker->sections[num_sections_total].address = rx_code_size; rx_code_size += align(section.data.size, X86_64_ALIGNMENT); continue; } if (section.write && section.type == SEC_NOBITS) { - linker->section_addresses[num_sections_total] = rw_zval_size; + linker->sections[num_sections_total].address = rw_zval_size; rw_zval_size += align(section.data.size, X86_64_ALIGNMENT); continue; } if (section.write) { - linker->section_offsets[num_sections_total] = rw_data_size; - linker->section_addresses[num_sections_total] = rw_data_size; + linker->sections[num_sections_total].offset = rw_data_size; + linker->sections[num_sections_total].address = rw_data_size; rw_data_size += align(section.data.size, X86_64_ALIGNMENT); continue; } - linker->section_offsets[num_sections_total] = ro_data_size; - linker->section_addresses[num_sections_total] = ro_data_size; + linker->sections[num_sections_total].offset = ro_data_size; + linker->sections[num_sections_total].address = ro_data_size; ro_data_size += align(section.data.size, X86_64_ALIGNMENT); } } - rx_code_size = align(rx_code_size, X86_64_PAGE_SIZE); - rw_zval_size = align(rw_zval_size, X86_64_PAGE_SIZE); - rw_data_size = align(rw_data_size, X86_64_PAGE_SIZE); - ro_data_size = align(ro_data_size, X86_64_PAGE_SIZE); - rw_got_size = align(rw_got_size, X86_64_PAGE_SIZE); - rw_dynamic_size = align(rw_dynamic_size, X86_64_PAGE_SIZE); - - i64 rw_zval_address = rx_code_address + rx_code_size; - i64 rw_data_address = rw_zval_address + rw_zval_size; - i64 ro_data_address = rw_data_address + rw_data_size; - i64 rw_got_address = ro_data_address + ro_data_size; - i64 rw_dynamic_address = rw_got_address + rw_got_size; - - i64 rx_code_offset = program_offset; - i64 rw_data_offset = rx_code_offset + rx_code_size; - i64 ro_data_offset = rw_data_offset + rw_data_size; - i64 rw_got_offset = ro_data_offset + ro_data_size; - i64 rw_dynamic_offset = rw_got_offset + rw_got_size; + i64 rw_zval_address = align(rx_code_address + rx_code_size, X86_64_PAGE_SIZE); + i64 rw_data_address = align(rw_zval_address + rw_zval_size, X86_64_PAGE_SIZE); + i64 ro_data_address = align(rw_data_address + rw_data_size, X86_64_PAGE_SIZE); + + i64 rx_code_offset = program_offset; + i64 rw_data_offset = align(rx_code_offset + rx_code_size, X86_64_PAGE_SIZE); + i64 ro_data_offset = align(rw_data_offset + rw_data_size, X86_64_PAGE_SIZE); for (i64 elf_index = 0, sec_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) { Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index); @@ -2788,16 +2672,16 @@ i64 unit_write_in_memory( continue; if (section.exec) { - linker->section_offsets[sec_index_global] += rx_code_offset + codegen->offset_code; - linker->section_addresses[sec_index_global] += rx_code_address + codegen->offset_code; + linker->sections[sec_index_global].offset += rx_code_offset + codegen->offset_code; + linker->sections[sec_index_global].address += rx_code_address + codegen->offset_code; } else if (section.write && section.type == SEC_NOBITS) { - linker->section_addresses[sec_index_global] += rw_zval_address; + linker->sections[sec_index_global].address += rw_zval_address; } else if (section.write) { - linker->section_offsets[sec_index_global] += rw_data_offset; - linker->section_addresses[sec_index_global] += rw_data_address; + linker->sections[sec_index_global].offset += rw_data_offset; + linker->sections[sec_index_global].address += rw_data_address; } else if (section.data.size > 0) { - linker->section_offsets[sec_index_global] += ro_data_offset + codegen->offset_ro_data; - linker->section_addresses[sec_index_global] += ro_data_address + codegen->offset_ro_data; + linker->sections[sec_index_global].offset += ro_data_offset + codegen->offset_ro_data; + linker->sections[sec_index_global].address += ro_data_address + codegen->offset_ro_data; } } } @@ -2826,19 +2710,18 @@ i64 unit_write_in_memory( if (sym.section == 65522) // common continue; - i64 sym_section = 0; i64 sym_address = sym.value.offset; if (sym.section != 65521 && elf_section(buf, sym.section).alloc) { - sym_section = sec_index_global + sym.section - 1; - CHECK(sym_section < num_sections_total, "Buffer overflow",); - CHECK(linker->section_addresses[sym_section] != 0, "Sanity",); - sym_address = linker->section_addresses[sym_section] + sym.value.offset; + i64 sym_section = sec_index_global + sym.section - 1; + CHECK(sym_section < num_sections_total, "Buffer overflow",); + CHECK(linker->sections[sym_section].address != 0, "Sanity",); + sym_address += linker->sections[sym_section].address; } CHECK(num_symbols < linker->max_num_symbols, "Too many symbols",); - linker->symbols[num_symbols++] = (Symbol_Entry) { + linker->symbols[num_symbols++] = (Link_Sym_Entry) { .name_size = sym.name.size, .name = sym_name, .address = sym_address, @@ -2858,84 +2741,37 @@ i64 unit_write_in_memory( // ========================================================== // - // TODO Add runtime library symbols - // - // __ehdr_start - // _GLOBAL_OFFSET_TABLE_ - // _DYNAMIC - // - // _Unwind_Resume - // _Unwind_Backtrace - // _Unwind_ForcedUnwind - // _Unwind_GetIP - // _Unwind_GetCFA - // - // _init - // _fini - // _end - // _dl_rtld_map - // __pthread_initialize_minimal - // __init_array_start - // __init_array_end - // __fini_array_start - // __fini_array_end - // __rela_iplt_start - // __rela_iplt_end - // __preinit_array_start - // __preinit_array_end - // __start___libc_atexit - // __stop___libc_atexit - // __stop___libc_IO_vtables - // __start___libc_IO_vtables - // __start___libc_subfreeres - // __stop___libc_subfreeres - // __start___libc_freeres_ptrs - // __stop___libc_freeres_ptrs - // __gcc_personality_v0 - // - // __addtf3 - // __subtf3 - // __multf3 - // __divtf3 - // __eqtf2 - // __lttf2 - // __letf2 - // __gttf2 - // __getf2 - // __unordtf2 - - CHECK(num_symbols + 2 <= linker->max_num_symbols, "Too many symbols",); - - linker->symbols[num_symbols++] = (Symbol_Entry) { + // Add runtime library symbols + + CHECK(num_symbols + 3 <= linker->max_num_symbols, "Too many symbols",); + + linker->symbols[num_symbols++] = (Link_Sym_Entry) { .name_size = 12, .name = "__ehdr_start", .address = base_address, .size = ELF_HEADER_SIZE, }; - // Add the dummy _GLOBAL_OFFSET_TABLE_ segment + i64 sym_index_got = num_symbols; - linker->symbols[num_symbols++] = (Symbol_Entry) { + linker->symbols[num_symbols++] = (Link_Sym_Entry) { .name_size = 21, .name = "_GLOBAL_OFFSET_TABLE_", - .address = rw_got_address, - .size = ELF_GOT_ENTRY_SIZE, }; - // Add the dummy _DYNAMIC segment + i64 sym_index_dynamic = num_symbols; - linker->symbols[num_symbols++] = (Symbol_Entry) { - .name_size = 8, - .name = "_DYNAMIC", - .address = rw_dynamic_address, - .size = ELF_DYNAMIC_ENTRY_SIZE, + linker->symbols[num_symbols++] = (Link_Sym_Entry) { + .name_size = 8, + .name = "_DYNAMIC", + .got_offset = 0, }; - // ============================================================== + // ========================================================== // - // Apply relocations + // Process relocations and build global offset table - for (i64 elf_index = 0, sec_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) { + for (i64 elf_index = 0, sec_index_global = 0, rel_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) { Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index); i64 num_sections = elf_section_headers(buf).num; @@ -2951,24 +2787,25 @@ i64 unit_write_in_memory( i64 dst_index_global = sec_index_global + dst_index - 1; CHECK(dst_index_global >= 0 && dst_index_global < linker->max_num_sections, "Buffer overflow",); - for (i64 entry_index = 0; entry_index < src_sec.num_entries; ++entry_index) { - Elf_Relx_Entry relx = elf_relx(buf, symtab, strtab, src_sec.data, entry_index, src_sec.type == SEC_RELA); - - c8 *sym_name = elf_name_from_offset(buf, relx.symbol.name); + for (i64 entry_index = 0; entry_index < src_sec.num_entries; ++entry_index, ++rel_index_global) { + Elf_Relx_Entry relx = elf_relx(buf, symtab, strtab, src_sec.data, entry_index, src_sec.type == SEC_RELA); + c8 * sym_name = elf_name_from_offset(buf, relx.symbol.name); + i64 sym_index_global = num_symbols; - Symbol_Entry symbol = {0}; + CHECK(num_symbols < linker->max_num_symbols, "Out of memory",); if (relx.symbol.section == 0) { b8 found = 0; for (i64 i = 0; i < pool->num_entities; ++i) - if (pool->entities[i].is_enabled && - pool->entities[i].type == ENTITY_PROC && - pool->entities[i].proc.name_size == relx.symbol.name.size && - mem_eq(pool->entities[i].proc.name, sym_name, relx.symbol.name.size)) { + if (pool->entities[i].is_enabled && + pool->entities[i].type == ENTITY_PROC && + str_eq(pool->entities[i].proc.name_size, + pool->entities[i].proc.name, + relx.symbol.name.size, sym_name)) { CHECK(pool->entities[i].proc.codegen.emit_done, "No proc code",); - symbol = (Symbol_Entry) { + linker->symbols[num_symbols++] = (Link_Sym_Entry) { .address = rx_code_address + pool->entities[i].proc.codegen.offset, .size = relx.symbol.value.size, }; @@ -2979,39 +2816,110 @@ i64 unit_write_in_memory( if (!found) for (i64 i = 0; i < num_symbols; ++i) - if (linker->symbols[i].name_size == relx.symbol.name.size && - mem_eq( - linker->symbols[i].name, - sym_name, - relx.symbol.name.size - )) { - symbol = linker->symbols[i]; - + if (str_eq(linker->symbols[i].name_size, linker->symbols[i].name, relx.symbol.name.size, sym_name)) { + sym_index_global = i; found = 1; break; } - if (!found && - find_str_in_table( - linker->not_found_buffer, - linker->not_found_buffer + not_found_size, - sym_name, - sym_name + relx.symbol.name.size - ) == NULL) { - // FIXME + if (!found) { LOG(WARNING, "Undefined symbol: %s", sym_name); CHECK(not_found_size + relx.symbol.name.size + 1 <= linker->max_not_found_size, "Out of memory",); mem_cpy(linker->not_found_buffer + not_found_size, sym_name, relx.symbol.name.size); not_found_size += relx.symbol.name.size + 1; + + linker->symbols[num_symbols++] = (Link_Sym_Entry) { + .address = 0, + .size = 0, + .name_size = relx.symbol.name.size, + .name = sym_name + }; } } else { + i64 const SEARCH_RANGE = 1024; + i64 src_index_global = sec_index_global + relx.symbol.section - 1; + i64 address = relx.symbol.value.offset + linker->sections[src_index_global].address; - symbol = (Symbol_Entry) { - .address = relx.symbol.value.offset + linker->section_addresses[src_index_global], - .size = relx.symbol.value.size, - }; + b8 found = 0; + + for (i64 k = 1; k <= num_symbols && k <= SEARCH_RANGE; ++k) + if (linker->symbols[num_symbols - k].address == address) { + sym_index_global = num_symbols - k; + found = 1; + break; + } + + if (!found) + linker->symbols[num_symbols++] = (Link_Sym_Entry) { + .address = address, + .size = relx.symbol.value.size, + }; + } + + if (sym_index_global >= num_symbols) + LOG(ERROR, "Symbol: %s", sym_name); + CHECK(sym_index_global < num_symbols, "Symbol not found",); + CHECK(rel_index_global < linker->max_num_rels, "Out of memory",); + + linker->rels[rel_index_global].symbol = sym_index_global; + + switch (relx.type) { + case R_X86_64_GOT32: + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: + case R_X86_64_REX_GOTPCRELX: + if ( + !STR_EQ(relx.symbol.name.size, sym_name, "_DYNAMIC") && + linker->symbols[sym_index_global].got_offset == 0) { + linker->symbols[sym_index_global].got_offset = rw_got_size; + rw_got_size += ELF_GOT_ENTRY_SIZE; + } + break; + + default:; } + } + } + + sec_index_global += num_sections - 1; + } + + i64 rw_got_address = align(ro_data_address + ro_data_size, X86_64_PAGE_SIZE); + i64 rw_dynamic_address = align(rw_got_address + rw_got_size, X86_64_PAGE_SIZE); + + i64 rw_got_offset = align(ro_data_offset + ro_data_size, X86_64_PAGE_SIZE); + i64 rw_dynamic_offset = align(rw_got_offset + rw_got_size, X86_64_PAGE_SIZE); + + linker->symbols[sym_index_got] .address = rw_got_address; + linker->symbols[sym_index_got] .size = rw_got_size; + linker->symbols[sym_index_dynamic].address = rw_dynamic_address; + linker->symbols[sym_index_dynamic].size = rw_dynamic_size; + + // ============================================================== + // + // Apply relocations + + for (i64 elf_index = 0, sec_index_global = 0, rel_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) { + Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index); + i64 num_sections = elf_section_headers(buf).num; + + Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; + Offset_Size symtab = elf_find_section_by_name(buf, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; + + for (i64 sec_index = 1; sec_index < num_sections; ++sec_index) { + Elf_Section_Header src_sec = elf_section(buf, sec_index); + if (src_sec.type != SEC_REL && src_sec.type != SEC_RELA) + continue; + + i64 dst_index = elf_find_related_section_index(buf, sec_index); + i64 dst_index_global = sec_index_global + dst_index - 1; + CHECK(dst_index_global >= 0 && dst_index_global < linker->max_num_sections, "Buffer overflow",); + + for (i64 entry_index = 0; entry_index < src_sec.num_entries; ++entry_index, ++rel_index_global) { + Elf_Relx_Entry relx = elf_relx(buf, symtab, strtab, src_sec.data, entry_index, src_sec.type == SEC_RELA); + c8 * sym_name = elf_name_from_offset(buf, relx.symbol.name); + Link_Sym_Entry symbol = linker->symbols[linker->rels[rel_index_global].symbol]; u8 *dst = buf.begin + elf_section(buf, dst_index).data.offset + relx.offset; @@ -3021,10 +2929,10 @@ i64 unit_write_in_memory( i64 A = relx.addent; // Represents the base address at which a shared object has been loaded into memory during execution. Generally, a shared object is built with a 0 base virtual address, but the execution address will be different. - i64 B = linker->section_addresses[dst_index_global]; + i64 B = linker->sections[dst_index_global].address; // Represents the place (section offset or address) of the storage unit being relocated (computed using r_offset). - i64 P = linker->section_addresses[dst_index_global] + relx.offset; + i64 P = linker->sections[dst_index_global].address + relx.offset; // Represents the value of the symbol whose index resides in the relocation entry. i64 S = symbol.address; @@ -3036,15 +2944,14 @@ i64 unit_write_in_memory( i64 GOT = rw_got_address; // Represents the offset into the global offset table at which the relocation entry's symbol will reside during execution. - i64 G = 8; // TODO + i64 G = symbol.got_offset; // Represents the place (section offset or address) of the Procedure Linkage Table entry for a symbol. i64 L = S; // TODO switch (relx.type) { - #define SKIP_(x) \ - if (relx.symbol.name.size == sizeof(#x) - 1 && \ - mem_eq(sym_name, #x, sizeof(#x) - 1)) \ + #define SKIP_(x) \ + if (str_eq(relx.symbol.name.size, sym_name, sizeof(#x) - 1, #x)) \ break; case R_X86_64_64: @@ -3103,46 +3010,46 @@ i64 unit_write_in_memory( #define TODO_ FAIL("Not implemented", 0) case R_X86_64_NONE: /* Do nothing */ break; - case R_X86_64_64: ADD_(64, S + A); break; // 64, S + A - case R_X86_64_PC32: ADD_(32, S + A - P); break; // 32, S + A - P - case R_X86_64_GOT32: TODO_; break; // 32, G + A - case R_X86_64_PLT32: ADD_(32, L + A - P); break; // 32, L + A - P + case R_X86_64_64: ADD_(64, S + A); break; + case R_X86_64_PC32: ADD_(32, S + A - P); break; + case R_X86_64_GOT32: TODO_; break; + case R_X86_64_PLT32: ADD_(32, L + A - P); break; case R_X86_64_COPY: /* Do nothing */ break; - case R_X86_64_GLOB_DAT: TODO_; break; // 64, S - case R_X86_64_JUMP_SLOT: TODO_; break; // 64, S - case R_X86_64_RELATIVE: TODO_; break; // 64, B + A - case R_X86_64_GOTPCREL: ADD_(32, G + GOT + A - P); break; // 32, G + GOT + A - P - case R_X86_64_32: TODO_; break; // 32, S + A - case R_X86_64_32S: TODO_; break; // 32, S + A - case R_X86_64_16: TODO_; break; // 16, S + A - case R_X86_64_PC16: TODO_; break; // 16, S + A - P - case R_X86_64_8: TODO_; break; // 8, S + A - case R_X86_64_PC8: TODO_; break; // 8, S + A - P + case R_X86_64_GLOB_DAT: ADD_(64, S); break; + case R_X86_64_JUMP_SLOT: ADD_(64, S); break; + case R_X86_64_RELATIVE: ADD_(64, B + A); break; + case R_X86_64_GOTPCREL: ADD_(32, GOT + G + A - P); break; + case R_X86_64_32: ADD_(32, S + A); break; + case R_X86_64_32S: ADD_(32, S + A); break; + case R_X86_64_16: ADD_(16, S + A); break; + case R_X86_64_PC16: ADD_(16, S + A - P); break; + case R_X86_64_8: ADD_(8, S + A); break; + case R_X86_64_PC8: ADD_(8, S + A - P); break; case R_X86_64_DTPMOD64: TODO_; break; case R_X86_64_DTPOFF64: TODO_; break; case R_X86_64_TPOFF64: TODO_; break; case R_X86_64_TLSGD: TODO_; break; case R_X86_64_TLSLD: TODO_; break; case R_X86_64_DTPOFF32: TODO_; break; - case R_X86_64_GOTTPOFF: ADD_(32, S - GOT); break; // 32, S - GOT - case R_X86_64_TPOFF32: ADD_(32, S + A - B); break; // 32, S + A - B - case R_X86_64_PC64: TODO_; break; // 64, S + A - P + case R_X86_64_GOTTPOFF: ADD_(32, S - GOT); break; + case R_X86_64_TPOFF32: ADD_(32, S + A - B); break; + case R_X86_64_PC64: ADD_(64, S + A - P); break; case R_X86_64_GOTOFF64: TODO_; break; - case R_X86_64_GOTPC32: TODO_; break; // 32, GOT + A - P + case R_X86_64_GOTPC32: ADD_(32, GOT + A - P); break; case R_X86_64_GOT64: TODO_; break; case R_X86_64_GOTPCREL64: TODO_; break; - case R_X86_64_GOTPC64: TODO_; break; // 64, GOT + A - P + case R_X86_64_GOTPC64: ADD_(64, GOT + A - P); break; case R_X86_64_GOTPLT64: TODO_; break; case R_X86_64_PLTOFF64: TODO_; break; - case R_X86_64_SIZE32: TODO_; break; // 32, Z + A - case R_X86_64_SIZE64: TODO_; break; // 64, Z + A + case R_X86_64_SIZE32: ADD_(32, Z + A); break; + case R_X86_64_SIZE64: ADD_(64, Z + A); break; case R_X86_64_GOTPC32_TLSDESC: TODO_; break; case R_X86_64_TLSDESC_CALL: TODO_; break; case R_X86_64_TLSDESC: TODO_; break; case R_X86_64_IRELATIVE: TODO_; break; case R_X86_64_RELATIVE64: TODO_; break; - case R_X86_64_GOTPCRELX: TODO_; break; - case R_X86_64_REX_GOTPCRELX: TODO_; break; + case R_X86_64_GOTPCRELX: ADD_(32, GOT + G + A - P); break; + case R_X86_64_REX_GOTPCRELX: ADD_(32, GOT + G + A - P); break; default: FAIL("Unknown relocation type", 0); @@ -3239,10 +3146,12 @@ i64 unit_write_in_memory( i64 entry = rx_code_address + codegen->entry_point; if (!codegen->has_entry) { + // TODO Explicitly declare _start proc + b8 found = 0; - for (i64 sym_index = 0; sym_index < num_symbols; ++sym_index) if (linker->symbols[sym_index].name_size == 6 && - mem_eq(linker->symbols[sym_index].name, "_start", 6)) { + for (i64 sym_index = 0; sym_index < num_symbols; ++sym_index) + if (STR_EQ(linker->symbols[sym_index].name_size, linker->symbols[sym_index].name, "_start")) { entry = linker->symbols[sym_index].address; found = 1; break; @@ -3401,7 +3310,7 @@ i64 unit_write_in_memory( for (i64 sec_index = 1; sec_index < headers.num; ++sec_index, ++sec_index_global) { Elf_Section_Header section = elf_section(buf, sec_index); - i64 offset = linker->section_offsets[sec_index_global]; + i64 offset = linker->sections[sec_index_global].offset; if (offset == 0 || !section.alloc || section.data.size == 0) @@ -3417,6 +3326,16 @@ i64 unit_write_in_memory( write_u64(LE, rw_dynamic_address, o + rw_got_offset, o_end); + for (i64 i = 0, offset = 0; i < num_symbols; ++i) { + Link_Sym_Entry *sym = linker->symbols + i; + if (sym->got_offset == 0) + continue; + + offset += ELF_GOT_ENTRY_SIZE; + CHECK(offset < rw_got_size, "Sanity",); + write_u64(LE, sym->address, o + rw_got_offset + offset, o_end); + } + // ============================================================== return output_size; @@ -3552,13 +3471,13 @@ void unit_write( mem_set(codegen->rels, 0, codegen->max_num_rels * sizeof *codegen->rels); mem_set(codegen->buffer_code, 0, codegen->max_code_size); - mem_set(codegen->buffer_ro_data, 0, codegen->max_ro_data_size); + mem_set(codegen->buffer_ro_data, 0, codegen->max_ro_data_size); mem_set(linker->obj_file_buffer, 0, linker->max_obj_file_size); mem_set(linker->dependencies_buffer, 0, linker->max_dependencies_size); mem_set(linker->obj_file_offsets, 0, linker->max_num_obj_files * sizeof *linker->obj_file_offsets); - mem_set(linker->section_offsets, 0, linker->max_num_sections * sizeof *linker->section_offsets); - mem_set(linker->section_addresses, 0, linker->max_num_sections * sizeof *linker->section_addresses); + mem_set(linker->sections, 0, linker->max_num_sections * sizeof *linker->sections); mem_set(linker->symbols, 0, linker->max_num_symbols * sizeof *linker->symbols); + mem_set(linker->rels, 0, linker->max_num_rels * sizeof *linker->rels); mem_set(linker->not_found_buffer, 0, linker->max_not_found_size); mem_set(linker->output_buffer, 0, linker->max_output_size); } @@ -3830,19 +3749,20 @@ Linker_Context g_linker = { .max_num_obj_files = MAX_NUM_OBJECT_FILES, .max_num_sections = MAX_NUM_SECTIONS, .max_num_symbols = MAX_NUM_SYMBOLS, + .max_num_rels = MAX_NUM_LINK_RELS, .max_not_found_size = MAX_NOT_FOUND_SIZE, .max_output_size = MAX_OUTPUT_SIZE, .num_obj_files = 0, - .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0}, - .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0}, - .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0}, - .section_offsets = (i64[MAX_NUM_SECTIONS]) {0}, - .section_addresses = (i64[MAX_NUM_SECTIONS]) {0}, - .symbols = (Symbol_Entry[MAX_NUM_SYMBOLS]) {0}, - .not_found_buffer = (c8[MAX_NOT_FOUND_SIZE]) {0}, - .output_buffer = (u8[MAX_OUTPUT_SIZE]) {0}, + .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0}, + .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0}, + .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0}, + .sections = (Link_Sec_Entry[MAX_NUM_SECTIONS]) {0}, + .symbols = (Link_Sym_Entry[MAX_NUM_SYMBOLS]) {0}, + .rels = (Link_Rel_Entry[MAX_NUM_LINK_RELS]) {0}, + .not_found_buffer = (c8[MAX_NOT_FOUND_SIZE]) {0}, + .output_buffer = (u8[MAX_OUTPUT_SIZE]) {0}, }; // Handy procedures @@ -4031,34 +3951,13 @@ b8 link_with_libc(void) { ); } - // // Add the entry point. - // i64 entry = p_new_entry(u); - // { - // // Initialize libc - // N_CALL_BY_NAME( - // entry, - // "__libc_start_main", - // n_ref(entry, mainproc), // main - // n_i32(entry, 0), // argc - // n_ref(entry, n_ptr(entry, 0)), // argv - // n_ptr(entry, 0), // init - // n_ptr(entry, 0), // fini - // n_ptr(entry, 0), // rtld_fini - // n_ptr(entry, 0) // stack_end - // ); - - // // Return - // n_ret(entry, 0, NULL); - // } - // ============================================================ // // Compile and link - // Add a static library. + // Add dependencies l_static(u, "c"); l_object(u, "crt1"); - // l_static(u, "test"); // Write the compilation unit into an executable file. u_elf_x86_64(u, "test_foo"); |