diff options
author | Mitya Selivanov <automainint@guattari.tech> | 2024-08-01 13:00:12 +0200 |
---|---|---|
committer | Mitya Selivanov <automainint@guattari.tech> | 2024-08-01 13:00:12 +0200 |
commit | 168551fbd44a3d8c64cd362511964566cf74091c (patch) | |
tree | 6fe0c0836dfd8a9244c8704a67140ac3b95ad8e2 /bxgen.c | |
parent | 1f3b03cb4eb15bd8eab8c48edf04fd9f3c7c1cb2 (diff) | |
download | bxgen-168551fbd44a3d8c64cd362511964566cf74091c.zip |
Precalculate GOT segment size
Diffstat (limited to 'bxgen.c')
-rwxr-xr-x | bxgen.c | 329 |
1 files changed, 237 insertions, 92 deletions
@@ -201,7 +201,6 @@ enum { MAX_NUM_LINK_RELS = 2 * 1024 * 1024, MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB MAX_DEPENDENCIES_SIZE = 50 * 1024 * 1024, // 50 MB - MAX_NOT_FOUND_SIZE = 10 * 1024, // 10 KB MAX_CODE_SIZE = 100 * 1024, // 100 KB MAX_OUTPUT_SIZE = 20 * 1024 * 1024, // 20 MB @@ -441,6 +440,8 @@ typedef struct { i64 address; i64 size; i64 got_offset; + i64 obj_index; + i64 sec_index; } Link_Sym_Entry; typedef struct { @@ -482,7 +483,6 @@ typedef struct { i64 max_num_sections; i64 max_num_symbols; i64 max_num_rels; - i64 max_not_found_size; i64 max_output_size; i64 num_obj_files; @@ -493,7 +493,6 @@ typedef struct { Link_Sec_Entry *sections; Link_Sym_Entry *symbols; Link_Rel_Entry *rels; - c8 * not_found_buffer; u8 * output_buffer; } Linker_Context; @@ -1753,6 +1752,7 @@ typedef struct { u8 * begin; u8 * end; Offset_Size elf; + i64 obj_index; } Buffer_Context; typedef struct { @@ -1779,7 +1779,7 @@ typedef struct { u8 bind; i64 section; Offset_Size value; - i64 _index; + i64 obj_index; } Elf_Symbol_Entry; typedef struct { @@ -2232,6 +2232,7 @@ Buffer_Context elf_buffer_context( .offset = linker->obj_file_offsets[elf_index], .size = linker->obj_file_offsets[elf_index + 1] - linker->obj_file_offsets[elf_index], }, + .obj_index = elf_index, }; } @@ -2459,7 +2460,7 @@ Elf_Symbol_Entry elf_symbol( .offset = sym_value, .size = sym_size, }, - ._index = symbol_index, + .obj_index = b.obj_index, }; } @@ -2570,14 +2571,13 @@ i64 unit_write_in_memory( i64 ro_data_size = codegen->offset_ro_data; i64 rw_data_size = 0; i64 rw_zval_size = 0; - i64 rw_tls_data_size = 0; - i64 rw_tls_zval_size = 0; i64 rw_got_size = ELF_GOT_ENTRY_SIZE; i64 rw_dynamic_size = ELF_DYNAMIC_ENTRY_SIZE; + i64 rw_tls_data_size = 0; + i64 rw_tls_zval_size = 0; i64 num_sections_total = 0; i64 num_symbols = 0; - i64 not_found_size = 0; // ========================================================== // @@ -2658,16 +2658,98 @@ i64 unit_write_in_memory( } } + // ========================================================== + // + // Calculate global offset table size + { + i64 prev_num_symbols = num_symbols; + + for (i64 elf_index = 0; elf_index < linker->num_obj_files; ++elf_index) { + Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index); + i64 num_sections = elf_section_headers(buf).num; + + Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; + Offset_Size symtab = elf_find_section_by_name(buf, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; + + for (i64 sec_index = 1; sec_index < num_sections; ++sec_index) { + Elf_Section_Header src_sec = elf_section(buf, sec_index); + if (src_sec.type != SEC_REL && src_sec.type != SEC_RELA) + continue; + + for (i64 entry_index = 0; entry_index < src_sec.num_entries; ++entry_index) { + Elf_Relx_Entry relx = elf_relx(buf, symtab, strtab, src_sec.data, entry_index, src_sec.type == SEC_RELA); + c8 * sym_name = elf_name_from_offset(buf, relx.symbol.name); + + CHECK(relx.symbol.section != 65521, "Sanity",); + CHECK(relx.symbol.section != 65522, "Sanity",); + + switch (relx.type) { + case R_X86_64_GOT32: + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: + case R_X86_64_REX_GOTPCRELX: + if (!STR_EQ(relx.symbol.name.size, sym_name, "_DYNAMIC")) { + b8 found = 0; + + for (i64 i = 0; i < num_symbols; ++i) + if (relx.symbol.name.size > 0) { + if (str_eq(linker->symbols[i].name_size, linker->symbols[i].name, relx.symbol.name.size, sym_name)) { + found = 1; + break; + } + } else { + if (linker->symbols[i].obj_index == relx.symbol.obj_index && + linker->symbols[i].sec_index == relx.symbol.section && + linker->symbols[i].address == relx.symbol.value.offset) { + found = 1; + break; + } + } + + if (found) + break; + + CHECK(num_symbols < linker->max_num_symbols, "Out of memory",); + linker->symbols[num_symbols++] = (Link_Sym_Entry) { + .name_size = relx.symbol.name.size, + .name = sym_name, + .address = relx.symbol.value.offset, + .obj_index = relx.symbol.obj_index, + .sec_index = relx.symbol.section, + }; + rw_got_size += ELF_GOT_ENTRY_SIZE; + } + break; + + default:; + } + } + } + } + + num_symbols = prev_num_symbols; + } + + // ========================================================== + // + // Adjust section offsets + i64 ro_data_address = align(rx_code_address + rx_code_size, X86_64_PAGE_SIZE); i64 rw_data_address = align(ro_data_address + ro_data_size, X86_64_PAGE_SIZE); i64 rw_zval_address = align(rw_data_address + rw_data_size, X86_64_PAGE_SIZE); - i64 rw_tls_data_address = align(rw_zval_address + rw_zval_size, X86_64_PAGE_SIZE); + i64 rw_got_address = align(rw_zval_address + rw_zval_size, X86_64_PAGE_SIZE); + i64 rw_dynamic_address = align(rw_got_address + rw_got_size, X86_64_PAGE_SIZE); + i64 rw_tls_data_address = align(rw_dynamic_address + rw_dynamic_size, X86_64_PAGE_SIZE); i64 rw_tls_zval_address = align(rw_tls_data_address + rw_tls_data_size, X86_64_PAGE_SIZE); i64 rx_code_offset = program_offset; - i64 ro_data_offset = align(rx_code_offset + rx_code_size, X86_64_PAGE_SIZE); - i64 rw_data_offset = align(ro_data_offset + ro_data_size, X86_64_PAGE_SIZE); - i64 rw_tls_data_offset = align(rw_data_offset + rw_data_size, X86_64_PAGE_SIZE); + i64 ro_data_offset = align(rx_code_offset + rx_code_size, X86_64_PAGE_SIZE); + i64 rw_data_offset = align(ro_data_offset + ro_data_size, X86_64_PAGE_SIZE); + i64 rw_got_offset = align(rw_data_offset + rw_data_size, X86_64_PAGE_SIZE); + i64 rw_dynamic_offset = align(rw_got_offset + rw_got_size, X86_64_PAGE_SIZE); + i64 rw_tls_data_offset = align(rw_dynamic_offset + rw_dynamic_size, X86_64_PAGE_SIZE); + + // ---------------------------------------------------------- for (i64 elf_index = 0, sec_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) { Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index); @@ -2795,37 +2877,121 @@ i64 unit_write_in_memory( // ========================================================== // + // TODO Add internal symbols + + // ========================================================== + // // Add runtime library symbols - CHECK(num_symbols + 3 <= linker->max_num_symbols, "Too many symbols",); + #define ADD_UNIQUE_(i_, name_, ...) \ + do { \ + b8 found_ = 0; \ + for (i_ = 0; i_ < num_symbols; ++i_) \ + if (STR_EQ(linker->symbols[i_].name_size, \ + linker->symbols[i_].name, \ + name_)) { \ + found_ = 1; \ + break; \ + } \ + CHECK(!found_, "Forbidden symbol: " name_,); \ + CHECK(num_symbols < linker->max_num_symbols, "Too many symbols",); \ + linker->symbols[num_symbols++] = (Link_Sym_Entry) { \ + .name_size = sizeof name_ - 1, \ + .name = name_, \ + __VA_ARGS__ \ + }; \ + } while (0) - linker->symbols[num_symbols++] = (Link_Sym_Entry) { - .name_size = 12, - .name = "__ehdr_start", - .address = base_address, - .size = ELF_HEADER_SIZE, - }; + #define ADD_IF_MISSING_(name_, ...) \ + do { \ + b8 found_ = 0; \ + for (i_ = 0; i_ < num_symbols; ++i_) \ + if (STR_EQ(linker->symbols[i_].name_size, \ + linker->symbols[i_].name, \ + name_)) { \ + found_ = 1; \ + break; \ + } \ + if (!found_) { \ + CHECK(num_symbols < linker->max_num_symbols, "Too many symbols",); \ + linker->symbols[num_symbols++] = (Link_Sym_Entry) { \ + .name_size = sizeof name_ - 1, \ + .name = name_, \ + __VA_ARGS__ \ + }; \ + } \ + } while (0) - i64 sym_index_got = num_symbols; + // ---------------------------------------------------------- - linker->symbols[num_symbols++] = (Link_Sym_Entry) { - .name_size = 21, - .name = "_GLOBAL_OFFSET_TABLE_", - }; + i64 sym_index_got; + i64 sym_index_dynamic; - i64 sym_index_dynamic = num_symbols; + ADD_UNIQUE_(sym_index_got, "_GLOBAL_OFFSET_TABLE_", .address = rw_got_address, .size = rw_got_size); + ADD_UNIQUE_(sym_index_dynamic, "_DYNAMIC", .address = rw_dynamic_address, .size = rw_dynamic_size, .got_offset = 0); - linker->symbols[num_symbols++] = (Link_Sym_Entry) { - .name_size = 8, - .name = "_DYNAMIC", - .got_offset = 0, - }; + { + i64 i_; + + ADD_IF_MISSING_("__ehdr_start", .address = base_address, .size = ELF_HEADER_SIZE); + + ADD_IF_MISSING_("__pthread_initialize_minimal",); + + ADD_IF_MISSING_("__preinit_array_start",); + ADD_IF_MISSING_("__preinit_array_end",); + ADD_IF_MISSING_("__init_array_start",); + ADD_IF_MISSING_("__init_array_end",); + ADD_IF_MISSING_("__fini_array_start",); + ADD_IF_MISSING_("__fini_array_end",); + ADD_IF_MISSING_("__rela_iplt_start",); + ADD_IF_MISSING_("__rela_iplt_end",); + ADD_IF_MISSING_("__start___libc_atexit",); + ADD_IF_MISSING_("__stop___libc_atexit",); + ADD_IF_MISSING_("__start___libc_IO_vtables",); + ADD_IF_MISSING_("__stop___libc_IO_vtables",); + ADD_IF_MISSING_("__start___libc_subfreeres",); + ADD_IF_MISSING_("__stop___libc_subfreeres",); + ADD_IF_MISSING_("__start___libc_freeres_ptrs",); + ADD_IF_MISSING_("__stop___libc_freeres_ptrs",); + + ADD_IF_MISSING_("_end",); + ADD_IF_MISSING_("_dl_rtld_map",); + ADD_IF_MISSING_("__gmon_start__",); + + ADD_IF_MISSING_("__gcc_personality_v0",); + ADD_IF_MISSING_("_Unwind_Resume",); + ADD_IF_MISSING_("_Unwind_ForcedUnwind",); + ADD_IF_MISSING_("_Unwind_Backtrace",); + ADD_IF_MISSING_("_Unwind_GetCFA",); + ADD_IF_MISSING_("_Unwind_GetIP",); + + ADD_IF_MISSING_("__addtf3",); + ADD_IF_MISSING_("__subtf3",); + ADD_IF_MISSING_("__multf3",); + ADD_IF_MISSING_("__divtf3",); + ADD_IF_MISSING_("__eqtf2",); + ADD_IF_MISSING_("__letf2",); + ADD_IF_MISSING_("__lttf2",); + ADD_IF_MISSING_("__getf2",); + ADD_IF_MISSING_("__gttf2",); + ADD_IF_MISSING_("__unordtf2",); + } + + #undef ADD_UNIQUE_ + #undef ADD_IF_MISSING_ + + // ========================================================== + // + // TODO Resolve internal symbols // ========================================================== // // Process relocations and build global offset table - for (i64 elf_index = 0, sec_index_global = 0, rel_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) { + for (i64 elf_index = 0, sec_index_global = 0, + rel_index_global = 0, got_offset = 0; + elf_index < linker->num_obj_files; + ++elf_index) { Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index); i64 num_sections = elf_section_headers(buf).num; @@ -2880,17 +3046,8 @@ i64 unit_write_in_memory( } if (!found) { - LOG(WARNING, "Undefined symbol: %s", sym_name); - CHECK(not_found_size + relx.symbol.name.size + 1 <= linker->max_not_found_size, "Out of memory",); - mem_cpy(linker->not_found_buffer + not_found_size, sym_name, relx.symbol.name.size); - not_found_size += relx.symbol.name.size + 1; - - linker->symbols[num_symbols++] = (Link_Sym_Entry) { - .address = 0, - .size = 0, - .name_size = relx.symbol.name.size, - .name = sym_name - }; + LOG(ERROR, "Undefined symbol: %s", sym_name); + FAIL("Link failed", 0); } } else { i64 const SEARCH_RANGE = 1024; @@ -2926,11 +3083,11 @@ i64 unit_write_in_memory( case R_X86_64_GOTPCREL: case R_X86_64_GOTPCRELX: case R_X86_64_REX_GOTPCRELX: - if ( - !STR_EQ(relx.symbol.name.size, sym_name, "_DYNAMIC") && - linker->symbols[sym_index_global].got_offset == 0) { - linker->symbols[sym_index_global].got_offset = rw_got_size; - rw_got_size += ELF_GOT_ENTRY_SIZE; + if (!STR_EQ(relx.symbol.name.size, sym_name, "_DYNAMIC") && + linker->symbols[sym_index_global].got_offset == 0) { + got_offset += ELF_GOT_ENTRY_SIZE; + CHECK(got_offset < rw_got_size, "Sanity",); + linker->symbols[sym_index_global].got_offset = got_offset; } break; @@ -2942,17 +3099,6 @@ i64 unit_write_in_memory( sec_index_global += num_sections - 1; } - i64 rw_got_address = align(rw_tls_zval_address + rw_tls_zval_size, X86_64_PAGE_SIZE); - i64 rw_dynamic_address = align(rw_got_address + rw_got_size, X86_64_PAGE_SIZE); - - i64 rw_got_offset = align(rw_tls_data_offset + rw_tls_data_size, X86_64_PAGE_SIZE); - i64 rw_dynamic_offset = align(rw_got_offset + rw_got_size, X86_64_PAGE_SIZE); - - linker->symbols[sym_index_got] .address = rw_got_address; - linker->symbols[sym_index_got] .size = rw_got_size; - linker->symbols[sym_index_dynamic].address = rw_dynamic_address; - linker->symbols[sym_index_dynamic].size = rw_dynamic_size; - // ============================================================== // // Apply relocations @@ -3121,7 +3267,7 @@ i64 unit_write_in_memory( // ============================================================== // - // Apply our relocations + // Apply internal relocations #define FIND_(x) \ do { for (i64 i = 0; i < num_symbols; ++i) \ @@ -3166,9 +3312,10 @@ i64 unit_write_in_memory( CHECK(rel.name_size > 0 && rel.name != NULL, "No proc name", 0); for (i64 i = 0; i < num_symbols; ++i) - if (linker->symbols[i].address != 0 && - linker->symbols[i].name_size == rel.name_size && - mem_eq(linker->symbols[i].name, rel.name, rel.name_size)) { + if (linker->symbols[i].address != 0 && + str_eq(linker->symbols[i].name_size, + linker->symbols[i].name, + rel.name_size, rel.name)) { i64 value = rel.value + linker->symbols[i].address; write_i64(LE, value, begin, end); found = 1; @@ -3202,7 +3349,7 @@ i64 unit_write_in_memory( // Writing the ELF executable // - i64 output_size = align(rw_dynamic_offset + rw_dynamic_size, X86_64_ALIGNMENT); + i64 output_size = align(rw_tls_data_offset + rw_tls_data_size, X86_64_PAGE_SIZE); CHECK(output_size <= linker->max_output_size, "Out of memory",); i64 entry = rx_code_address + codegen->entry_point; @@ -3229,9 +3376,9 @@ i64 unit_write_in_memory( LOG(VERBOSE, "Total size"); LOG(VERBOSE, "r/x code - %7lld bytes", rx_code_size); - LOG(VERBOSE, "r/w zval - %7lld bytes", rw_zval_size); - LOG(VERBOSE, "r/w data - %7lld bytes", rw_data_size); LOG(VERBOSE, "r/o data - %7lld bytes", ro_data_size); + LOG(VERBOSE, "r/w data - %7lld bytes", rw_data_size); + LOG(VERBOSE, "r/w zval - %7lld bytes", rw_zval_size); LOG(VERBOSE, "r/w TLS data - %7lld bytes", rw_tls_data_size); LOG(VERBOSE, "r/w TLS zval - %7lld bytes", rw_tls_zval_size); LOG(VERBOSE, "r/w GOT - %7lld bytes", rw_got_size); @@ -3274,9 +3421,9 @@ i64 unit_write_in_memory( CHECK(rx_code_offset % X86_64_PAGE_SIZE == rx_code_address % X86_64_PAGE_SIZE, "Invalid alignment",); CHECK(rw_data_offset % X86_64_PAGE_SIZE == rw_data_address % X86_64_PAGE_SIZE, "Invalid alignment",); CHECK(ro_data_offset % X86_64_PAGE_SIZE == ro_data_address % X86_64_PAGE_SIZE, "Invalid alignment",); - CHECK(rw_tls_data_offset % X86_64_PAGE_SIZE == rw_tls_data_address % X86_64_PAGE_SIZE, "Invalid alignment",); CHECK(rw_got_offset % X86_64_PAGE_SIZE == rw_got_address % X86_64_PAGE_SIZE, "Invalid alignemtn",); CHECK(rw_dynamic_offset % X86_64_PAGE_SIZE == rw_dynamic_address % X86_64_PAGE_SIZE, "Invalid alignemtn",); + CHECK(rw_tls_data_offset % X86_64_PAGE_SIZE == rw_tls_data_address % X86_64_PAGE_SIZE, "Invalid alignment",); u8 *h = o + ELF_HEADER_SIZE; @@ -3335,8 +3482,31 @@ i64 unit_write_in_memory( write_i64(LE, X86_64_ALIGNMENT, h + 48, o_end); h += ELF_PROGRAM_HEADER_SIZE; + // r/w GOT + write_u32(LE, 1, h, o_end); // type (PT_LOAD) + write_u32(LE, 6, h + 4, o_end); // flags (PF_R | PF_W) + write_i64(LE, rw_got_offset, h + 8, o_end); + write_i64(LE, rw_got_address, h + 16, o_end); // virtual address + write_i64(LE, rw_got_address, h + 24, o_end); // phisical address + write_i64(LE, rw_got_size, h + 32, o_end); // size in file + write_i64(LE, rw_got_size, h + 40, o_end); // size in memory + write_i64(LE, X86_64_ALIGNMENT, h + 48, o_end); + h += ELF_PROGRAM_HEADER_SIZE; + + // r/w dynamic + write_u32(LE, 2, h, o_end); // type (PT_DYNAMIC) + write_u32(LE, 6, h + 4, o_end); // flags (PF_R | PF_W) + write_i64(LE, rw_dynamic_offset, h + 8, o_end); + write_i64(LE, rw_dynamic_address, h + 16, o_end); // virtual address + write_i64(LE, rw_dynamic_address, h + 24, o_end); // phisical address + write_i64(LE, rw_dynamic_size, h + 32, o_end); // size in file + write_i64(LE, rw_dynamic_size, h + 40, o_end); // size in memory + write_i64(LE, X86_64_ALIGNMENT, h + 48, o_end); + h += ELF_PROGRAM_HEADER_SIZE; + // TLS segment - i64 tls_size_total = rw_tls_zval_size + rw_tls_data_size; + i64 tls_size_total = align(rw_tls_data_size, X86_64_PAGE_SIZE) + rw_tls_zval_size; + write_u32(LE, 1, h, o_end); // type (PT_LOAD) write_u32(LE, 6, h + 4, o_end); // flags (PF_R | PF_W) write_i64(LE, rw_tls_data_offset, h + 8, o_end); @@ -3369,28 +3539,6 @@ i64 unit_write_in_memory( write_i64(LE, X86_64_ALIGNMENT, h + 48, o_end); h += ELF_PROGRAM_HEADER_SIZE; - // r/w GOT - write_u32(LE, 1, h, o_end); // type (PT_LOAD) - write_u32(LE, 6, h + 4, o_end); // flags (PF_R | PF_W) - write_i64(LE, rw_got_offset, h + 8, o_end); - write_i64(LE, rw_got_address, h + 16, o_end); // virtual address - write_i64(LE, rw_got_address, h + 24, o_end); // phisical address - write_i64(LE, rw_got_size, h + 32, o_end); // size in file - write_i64(LE, rw_got_size, h + 40, o_end); // size in memory - write_i64(LE, X86_64_ALIGNMENT, h + 48, o_end); - h += ELF_PROGRAM_HEADER_SIZE; - - // r/w dynamic - write_u32(LE, 2, h, o_end); // type (PT_DYNAMIC) - write_u32(LE, 6, h + 4, o_end); // flags (PF_R | PF_W) - write_i64(LE, rw_dynamic_offset, h + 8, o_end); - write_i64(LE, rw_dynamic_address, h + 16, o_end); // virtual address - write_i64(LE, rw_dynamic_address, h + 24, o_end); // phisical address - write_i64(LE, rw_dynamic_size, h + 32, o_end); // size in file - write_i64(LE, rw_dynamic_size, h + 40, o_end); // size in memory - write_i64(LE, X86_64_ALIGNMENT, h + 48, o_end); - h += ELF_PROGRAM_HEADER_SIZE; - CHECK(h == o + ELF_HEADER_SIZE + num_program_headers * ELF_PROGRAM_HEADER_SIZE, "Invalid num program headers",); CHECK(rx_code_offset >= h - o, "Sanity",); @@ -3578,7 +3726,6 @@ void unit_write( mem_set(linker->sections, 0, linker->max_num_sections * sizeof *linker->sections); mem_set(linker->symbols, 0, linker->max_num_symbols * sizeof *linker->symbols); mem_set(linker->rels, 0, linker->max_num_rels * sizeof *linker->rels); - mem_set(linker->not_found_buffer, 0, linker->max_not_found_size); mem_set(linker->output_buffer, 0, linker->max_output_size); } @@ -3850,7 +3997,6 @@ Linker_Context g_linker = { .max_num_sections = MAX_NUM_SECTIONS, .max_num_symbols = MAX_NUM_SYMBOLS, .max_num_rels = MAX_NUM_LINK_RELS, - .max_not_found_size = MAX_NOT_FOUND_SIZE, .max_output_size = MAX_OUTPUT_SIZE, .num_obj_files = 0, @@ -3861,7 +4007,6 @@ Linker_Context g_linker = { .sections = (Link_Sec_Entry[MAX_NUM_SECTIONS]) {0}, .symbols = (Link_Sym_Entry[MAX_NUM_SYMBOLS]) {0}, .rels = (Link_Rel_Entry[MAX_NUM_LINK_RELS]) {0}, - .not_found_buffer = (c8[MAX_NOT_FOUND_SIZE]) {0}, .output_buffer = (u8[MAX_OUTPUT_SIZE]) {0}, }; |