summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2024-08-01 06:47:59 +0200
committerMitya Selivanov <automainint@guattari.tech>2024-08-01 06:47:59 +0200
commit4bc7fe83b94e400ac190a6140ce9792a1c1ba106 (patch)
tree7cd1464174d57361f0ecbf06f90f54eff37b5192
parentde0850e7f8e62e4b3873e63aee10d9253b006c5f (diff)
downloadbxgen-4bc7fe83b94e400ac190a6140ce9792a1c1ba106.zip
Global offset table
-rwxr-xr-xbxgen.c631
1 files changed, 265 insertions, 366 deletions
diff --git a/bxgen.c b/bxgen.c
index f6d0da8..b26ded0 100755
--- a/bxgen.c
+++ b/bxgen.c
@@ -27,9 +27,11 @@
#/
#/ To-Do list
#/
+#/ - Simplify
+#/ - Use 0 for UNDEFINED. Make the zero value useful
+#/ - Factor out checks
#/ - Library
#/ - Terminal color option
-#/ - Use 0 for UNDEFINED. Make the zero value useful
#/ - String table for names and arrays
#/ - Proper prefixes for identifiers
#/ - Effective entity allocation
@@ -84,8 +86,7 @@
#/
#/ Bugs
#/
-#/ - Segfault at 0x58fe2d (after 0x401c3f, 0x58fe2d)
-#/ - in 0x58fabc
+#/ - Segfault at 0x45bb82
#/
#/ Done features
#/
@@ -195,6 +196,7 @@ enum {
MAX_NUM_SECTIONS = 2 * 1024 * 1024,
MAX_NUM_SYMBOLS = 2 * 1024 * 1024,
MAX_NUM_RELS = 100 * 1024,
+ MAX_NUM_LINK_RELS = 2 * 1024 * 1024,
MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB
MAX_DEPENDENCIES_SIZE = 50 * 1024 * 1024, // 50 MB
MAX_NOT_FOUND_SIZE = 10 * 1024, // 10 KB
@@ -433,11 +435,21 @@ typedef struct {
} Rel_Entry;
typedef struct {
+ i64 offset;
+ i64 address;
+} Link_Sec_Entry;
+
+typedef struct {
i64 name_size;
c8 *name;
i64 address;
i64 size;
-} Symbol_Entry;
+ i64 got_offset;
+} Link_Sym_Entry;
+
+typedef struct {
+ i64 symbol;
+} Link_Rel_Entry;
// Pool, a collection of all entities.
//
@@ -473,19 +485,20 @@ typedef struct {
i64 max_num_obj_files;
i64 max_num_sections;
i64 max_num_symbols;
+ i64 max_num_rels;
i64 max_not_found_size;
i64 max_output_size;
i64 num_obj_files;
- u8 * obj_file_buffer;
- u8 * dependencies_buffer;
- i64 * obj_file_offsets;
- i64 * section_offsets;
- i64 * section_addresses;
- Symbol_Entry *symbols;
- c8 * not_found_buffer;
- u8 * output_buffer;
+ u8 * obj_file_buffer;
+ u8 * dependencies_buffer;
+ i64 * obj_file_offsets;
+ Link_Sec_Entry *sections;
+ Link_Sym_Entry *symbols;
+ Link_Rel_Entry *rels;
+ c8 * not_found_buffer;
+ u8 * output_buffer;
} Linker_Context;
// ================================================================
@@ -695,6 +708,13 @@ b8 mem_eq(void *a, void *b, i64 size) {
return 1;
}
+b8 str_eq(i64 a_len, c8 *a, i64 b_len, c8 *b) {
+ return a_len == b_len && mem_eq(a, b, a_len);
+}
+
+#define STR_EQ(z, a, b) \
+ (z == sizeof(b) - 1 && mem_eq((a), (b), sizeof(b) - 1))
+
i64 str_len(c8 *s, c8 *s_end) {
CHECK(s < s_end, "Buffer overflow", 0);
@@ -751,7 +771,7 @@ c8 *find_str_in_table(c8 *buf, c8 *buf_end, c8 *sub, c8 *sub_end) {
while (buf < buf_end) {
i64 len = str_len(buf, buf_end);
- if (sub_end - sub == len && mem_eq(buf, sub, len))
+ if (str_eq(len, buf, sub_end - sub, sub))
return buf;
buf += len + 1;
}
@@ -2050,7 +2070,7 @@ void x86_64_emit_node(
write_u8(LE, 0xc4, begin + 84, end); //
write_u8(LE, 16, begin + 85, end); // 16
- codegen->offset_code += 86;
+ codegen->offset_code += 86;
codegen->offset_ro_data += dat_2->lit.num_bytes;
} break;
@@ -2294,8 +2314,7 @@ i64 elf_find_section_index_by_name(
i64 name_index = (i64) read_u32(LE, begin, b.end);
Offset_Size s = elf_name_in_string_table(b, names, name_index);
- if (s.size == name_size &&
- mem_eq(b.begin + s.offset, name, name_size))
+ if (str_eq(s.size, (c8 *) b.begin + s.offset, name_size, name))
return i;
}
@@ -2488,7 +2507,7 @@ Elf_Symbol_Entry elf_find_symbol_by_name(
CHECK(b.begin + sym.name.offset + name_size <= b.end, "Buffer overflow", (Elf_Symbol_Entry) {0});
CHECK(sym.name.offset + name_size <= b.elf.size, "Buffer overflow", (Elf_Symbol_Entry) {0});
- if (name_size == sym.name.size && mem_eq(name, b.begin + sym.name.offset, name_size))
+ if (str_eq(name_size, name, sym.name.size, (c8 *) b.begin + sym.name.offset))
return sym;
}
@@ -2501,146 +2520,27 @@ void elf_checks(Buffer_Context b) {
u8 osabi = read_u8(LE, begin + 7, end);
- CHECK( read_u8 (LE, begin, end) == ELF_MAGIC[0], "Invalid ELF file",);
- CHECK( read_u8 (LE, begin + 1, end) == ELF_MAGIC[1], "Invalid ELF file",);
- CHECK( read_u8 (LE, begin + 2, end) == ELF_MAGIC[2], "Invalid ELF file",);
- CHECK( read_u8 (LE, begin + 3, end) == ELF_MAGIC[3], "Invalid ELF file",);
-
- CHECK( read_u8 (LE, begin + 4, end) == ELF_64, "Unsupported ELF file",);
- CHECK( read_u8 (LE, begin + 5, end) == ELF_2_LE, "Unsupported ELF file",);
- CHECK( read_u8 (LE, begin + 6, end) == ELF_VERSION, "Unsupported ELF file",);
- CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",);
- CHECK( read_u8 (LE, begin + 8, end) == ELF_ABI_VERSION, "Unsupported ELF file",);
- CHECK( read_u16(LE, begin + 16, end) == ELF_RELOCATABLE, "Unsupported ELF file",);
- CHECK( read_u16(LE, begin + 18, end) == ELF_X86_64, "Unsupported ELF file",);
- CHECK( read_u32(LE, begin + 20, end) == ELF_VERSION, "Unsupported ELF file",);
-
- LAX( read_u64(LE, begin + 24, end) == 0, "Invalid entry point");
- LAX( read_u64(LE, begin + 32, end) == 0, "Invalid program header offset");
- LAX( read_u32(LE, begin + 48, end) == 0, "Invalid flags");
- LAX( read_u16(LE, begin + 52, end) == ELF_HEADER_SIZE, "Invalid ELF header size");
- LAX( read_u16(LE, begin + 54, end) == 0, "Invalid program header size");
- LAX( read_u16(LE, begin + 56, end) == 0, "Invalid num program headers");
- LAX( read_u16(LE, begin + 58, end) == ELF_SECTION_HEADER_SIZE, "Invalid section header size");
-}
-
-void elf_dump(u32 log_level, Buffer_Context b, b8 term_color) {
- Offset_Num headers = elf_section_headers(b);
- Offset_Size strtab = elf_find_section_by_name(b, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data;
- Offset_Size symtab = elf_find_section_by_name(b, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data;
-
- for (i64 sec_index = 1; sec_index < headers.num; ++sec_index) {
- Elf_Section_Header section = elf_section(b, sec_index);
-
- c8 *name = elf_name_from_offset(b, section.name);
-
- LOG(
- log_level,
- "\"%s%s%s\"%*s%-14s%s%s%s%s%lld%s",
- !term_color ? "" :
- section.type == SEC_SYMTAB ||
- section.type == SEC_RELA ||
- section.type == SEC_REL ? "\x1b[32m" :
- section.alloc ? "\x1b[34m" :
- section.type == SEC_STRTAB ? "\x1b[33m" :
- "\x1b[31m",
- name,
- !term_color ? "" : "\x1b[37m",
- (i32) (section.name.size < 30 ? 30 - section.name.size : 1),
- "",
- SEC_TYPE_NAMES[section.type],
- section.alloc ? "R" : "_",
- section.write ? "W" : "_",
- section.exec ? "X" : "_",
- section.data.size > 0 ? " - " : "",
- section.data.size,
- section.data.size > 0 ? " bytes" : "\b "
- );
-
- switch (section.type) {
- case SEC_SYMTAB:
- LOG(log_level, " - -");
-
- for (i64 sym_index = 1; sym_index < section.num_entries; ++sym_index) {
- Elf_Symbol_Entry sym = elf_symbol(b, section.data, strtab, (u16) sym_index);
-
- c8 *name = elf_name_from_offset(b, sym.name);
-
- i32 len = (sym.name.size == 0) ? 4 : (i32) sym.name.size;
-
- LOG(
- log_level,
- " %08llx %-04llx %s%s%s%s%s %.*s %s%-7s %s%s",
- section.data.offset + sym.value.offset,
- sym.value.size,
- *name != '\0' ? "\"" : "",
- !term_color ? "" :
- sym.bind == BIND_GLOBAL ? "\x1b[32m" :
- sym.bind == BIND_WEAK ? "\x1b[35m" :
- "\x1b[31m",
- *name != '\0' ? name : "<NONE>",
- !term_color ? "" : "\x1b[37m",
- *name != '\0' ? "\"" : "",
- 31 < len ? 1 : 32 - len,
- 31 < len ? " " : "........................................",
- !term_color ? "" :
- sym.type == SYM_PROC ? "\x1b[32m" :
- sym.type == SYM_DATA ? "\x1b[32m" :
- sym.type == SYM_COMMON ? "\x1b[33m" :
- sym.type == SYM_TLS ? "\x1b[35m" :
- sym.type == SYM_SECTION ? "\x1b[31m" :
- sym.type == SYM_SPECIFIC ? "\x1b[31m" :
- "",
- SYM_TYPE_NAMES[sym.type],
- !term_color ? (sym.section == 0 ? "undefined" : "") :
- sym.section == 0 ? (
- sym.bind == BIND_GLOBAL || sym.bind == BIND_WEAK ?
- "\x1b[33mundefined" : "\x1b[31mundefined") : "",
- !term_color ? "" : "\x1b[37m"
- );
- }
-
- LOG(log_level, " - -");
- break;
-
- case SEC_REL:
- case SEC_RELA: {
- LOG(log_level, " - -");
-
- for (i64 relx_index = 0; relx_index < section.num_entries; ++relx_index) {
- Elf_Relx_Entry relx = elf_relx(b, symtab, strtab, section.data, relx_index, section.type == SEC_RELA);
-
- LOG(
- log_level,
- " %-16s %08llx %-+5lld <= %s%08llx%s%s%s \"%s\"",
- REL_NAMES[relx.type],
- relx.offset,
- relx.addent,
- !term_color ? "" :
- relx.symbol.bind == BIND_WEAK ? "\x1b[33m" : "\x1b[32m",
- relx.symbol.value.offset + elf_section(b, relx.symbol.section).data.offset,
- !term_color ? "" : "\x1b[37m",
- !term_color ? "" :
- relx.symbol.type == SYM_DATA ? " \x1b[34mdata" :
- relx.symbol.type == SYM_COMMON ? " \x1b[32mdata" :
- relx.symbol.type == SYM_TLS ? " \x1b[34mdata" :
- relx.symbol.type == SYM_PROC ? " \x1b[34mproc" :
- relx.symbol.type == SYM_SECTION ? " \x1b[36msect" :
- relx.symbol.type == SYM_SPECIFIC ? " \x1b[34mspec" :
- " \x1b[33mnone",
- !term_color ? "" : "\x1b[37m",
- elf_name_from_offset(b, relx.symbol.name)
- );
- }
-
- LOG(log_level, " - -");
- } break;
-
- default:;
- }
- }
-
- LOG(log_level, "");
+ CHECK( read_u8 (LE, begin, end) == ELF_MAGIC[0], "Invalid ELF file",);
+ CHECK( read_u8 (LE, begin + 1, end) == ELF_MAGIC[1], "Invalid ELF file",);
+ CHECK( read_u8 (LE, begin + 2, end) == ELF_MAGIC[2], "Invalid ELF file",);
+ CHECK( read_u8 (LE, begin + 3, end) == ELF_MAGIC[3], "Invalid ELF file",);
+
+ CHECK( read_u8 (LE, begin + 4, end) == ELF_64, "Unsupported ELF file",);
+ CHECK( read_u8 (LE, begin + 5, end) == ELF_2_LE, "Unsupported ELF file",);
+ CHECK( read_u8 (LE, begin + 6, end) == ELF_VERSION, "Unsupported ELF file",);
+ CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",);
+ CHECK( read_u8 (LE, begin + 8, end) == ELF_ABI_VERSION, "Unsupported ELF file",);
+ CHECK( read_u16(LE, begin + 16, end) == ELF_RELOCATABLE, "Unsupported ELF file",);
+ CHECK( read_u16(LE, begin + 18, end) == ELF_X86_64, "Unsupported ELF file",);
+ CHECK( read_u32(LE, begin + 20, end) == ELF_VERSION, "Unsupported ELF file",);
+
+ LAX(read_u64(LE, begin + 24, end) == 0, "Invalid entry point");
+ LAX(read_u64(LE, begin + 32, end) == 0, "Invalid program header offset");
+ LAX(read_u32(LE, begin + 48, end) == 0, "Invalid flags");
+ LAX(read_u16(LE, begin + 52, end) == ELF_HEADER_SIZE, "Invalid ELF header size");
+ LAX(read_u16(LE, begin + 54, end) == 0, "Invalid program header size");
+ LAX(read_u16(LE, begin + 56, end) == 0, "Invalid num program headers");
+ LAX(read_u16(LE, begin + 58, end) == ELF_SECTION_HEADER_SIZE, "Invalid section header size");
}
i64 unit_write_in_memory(
@@ -2662,7 +2562,7 @@ i64 unit_write_in_memory(
emit_unit(pool, codegen, unit, arch);
u16 num_program_headers = 7;
- i64 program_offset = align(ELF_HEADER_SIZE + ELF_PROGRAM_HEADER_SIZE * num_program_headers, X86_64_ALIGNMENT);
+ i64 program_offset = align(ELF_HEADER_SIZE + ELF_PROGRAM_HEADER_SIZE * num_program_headers, X86_64_PAGE_SIZE);
i64 base_address = X86_64_BASE_ADDRESS;
i64 rx_code_address = base_address + program_offset;
@@ -2686,7 +2586,6 @@ i64 unit_write_in_memory(
Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index);
elf_checks(buf);
- // elf_dump(VERBOSE, buf, 1);
Offset_Num headers = elf_section_headers(buf);
@@ -2697,15 +2596,13 @@ i64 unit_write_in_memory(
c8 *name = elf_name_from_offset(buf, section.name);
- if (section.type == SEC_PROGBITS &&
- section.name.size == sizeof SECTION_GOT - 1 &&
- mem_eq(name, SECTION_GOT, sizeof SECTION_GOT - 1)) {
+ if (section.type == SEC_PROGBITS &&
+ STR_EQ(section.name.size, name, SECTION_GOT)) {
FAIL("Not implemented", 0);
}
- if (section.type == SEC_PROGBITS &&
- section.name.size == sizeof SECTION_PLT - 1 &&
- mem_eq(name, SECTION_PLT, sizeof SECTION_PLT - 1)) {
+ if (section.type == SEC_PROGBITS &&
+ STR_EQ(section.name.size, name, SECTION_PLT)) {
FAIL("Not implemented", 0);
}
@@ -2716,64 +2613,51 @@ i64 unit_write_in_memory(
FAIL("Not implemented", 0);
}
- if (section.type == SEC_PROGBITS && section.exec &&
- section.name.size == sizeof SECTION_INIT - 1 &&
- mem_eq(name, SECTION_INIT, sizeof SECTION_INIT - 1)) {
+ if (section.type == SEC_PROGBITS && section.exec &&
+ STR_EQ(section.name.size, name, SECTION_INIT)) {
FAIL("Not implemented", 0);
}
- if (section.type == SEC_PROGBITS && section.exec &&
- section.name.size == sizeof SECTION_FINI - 1 &&
- mem_eq(name, SECTION_FINI, sizeof SECTION_FINI - 1)) {
+ if (section.type == SEC_PROGBITS && section.exec &&
+ STR_EQ(section.name.size, name, SECTION_FINI)) {
FAIL("Not implemented", 0);
}
if (section.exec) {
CHECK(!section.write, "Not implemented", 0);
- linker->section_offsets[num_sections_total] = rx_code_size;
- linker->section_addresses[num_sections_total] = rx_code_size;
+ linker->sections[num_sections_total].offset = rx_code_size;
+ linker->sections[num_sections_total].address = rx_code_size;
rx_code_size += align(section.data.size, X86_64_ALIGNMENT);
continue;
}
if (section.write && section.type == SEC_NOBITS) {
- linker->section_addresses[num_sections_total] = rw_zval_size;
+ linker->sections[num_sections_total].address = rw_zval_size;
rw_zval_size += align(section.data.size, X86_64_ALIGNMENT);
continue;
}
if (section.write) {
- linker->section_offsets[num_sections_total] = rw_data_size;
- linker->section_addresses[num_sections_total] = rw_data_size;
+ linker->sections[num_sections_total].offset = rw_data_size;
+ linker->sections[num_sections_total].address = rw_data_size;
rw_data_size += align(section.data.size, X86_64_ALIGNMENT);
continue;
}
- linker->section_offsets[num_sections_total] = ro_data_size;
- linker->section_addresses[num_sections_total] = ro_data_size;
+ linker->sections[num_sections_total].offset = ro_data_size;
+ linker->sections[num_sections_total].address = ro_data_size;
ro_data_size += align(section.data.size, X86_64_ALIGNMENT);
}
}
- rx_code_size = align(rx_code_size, X86_64_PAGE_SIZE);
- rw_zval_size = align(rw_zval_size, X86_64_PAGE_SIZE);
- rw_data_size = align(rw_data_size, X86_64_PAGE_SIZE);
- ro_data_size = align(ro_data_size, X86_64_PAGE_SIZE);
- rw_got_size = align(rw_got_size, X86_64_PAGE_SIZE);
- rw_dynamic_size = align(rw_dynamic_size, X86_64_PAGE_SIZE);
-
- i64 rw_zval_address = rx_code_address + rx_code_size;
- i64 rw_data_address = rw_zval_address + rw_zval_size;
- i64 ro_data_address = rw_data_address + rw_data_size;
- i64 rw_got_address = ro_data_address + ro_data_size;
- i64 rw_dynamic_address = rw_got_address + rw_got_size;
-
- i64 rx_code_offset = program_offset;
- i64 rw_data_offset = rx_code_offset + rx_code_size;
- i64 ro_data_offset = rw_data_offset + rw_data_size;
- i64 rw_got_offset = ro_data_offset + ro_data_size;
- i64 rw_dynamic_offset = rw_got_offset + rw_got_size;
+ i64 rw_zval_address = align(rx_code_address + rx_code_size, X86_64_PAGE_SIZE);
+ i64 rw_data_address = align(rw_zval_address + rw_zval_size, X86_64_PAGE_SIZE);
+ i64 ro_data_address = align(rw_data_address + rw_data_size, X86_64_PAGE_SIZE);
+
+ i64 rx_code_offset = program_offset;
+ i64 rw_data_offset = align(rx_code_offset + rx_code_size, X86_64_PAGE_SIZE);
+ i64 ro_data_offset = align(rw_data_offset + rw_data_size, X86_64_PAGE_SIZE);
for (i64 elf_index = 0, sec_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) {
Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index);
@@ -2788,16 +2672,16 @@ i64 unit_write_in_memory(
continue;
if (section.exec) {
- linker->section_offsets[sec_index_global] += rx_code_offset + codegen->offset_code;
- linker->section_addresses[sec_index_global] += rx_code_address + codegen->offset_code;
+ linker->sections[sec_index_global].offset += rx_code_offset + codegen->offset_code;
+ linker->sections[sec_index_global].address += rx_code_address + codegen->offset_code;
} else if (section.write && section.type == SEC_NOBITS) {
- linker->section_addresses[sec_index_global] += rw_zval_address;
+ linker->sections[sec_index_global].address += rw_zval_address;
} else if (section.write) {
- linker->section_offsets[sec_index_global] += rw_data_offset;
- linker->section_addresses[sec_index_global] += rw_data_address;
+ linker->sections[sec_index_global].offset += rw_data_offset;
+ linker->sections[sec_index_global].address += rw_data_address;
} else if (section.data.size > 0) {
- linker->section_offsets[sec_index_global] += ro_data_offset + codegen->offset_ro_data;
- linker->section_addresses[sec_index_global] += ro_data_address + codegen->offset_ro_data;
+ linker->sections[sec_index_global].offset += ro_data_offset + codegen->offset_ro_data;
+ linker->sections[sec_index_global].address += ro_data_address + codegen->offset_ro_data;
}
}
}
@@ -2826,19 +2710,18 @@ i64 unit_write_in_memory(
if (sym.section == 65522) // common
continue;
- i64 sym_section = 0;
i64 sym_address = sym.value.offset;
if (sym.section != 65521 && elf_section(buf, sym.section).alloc) {
- sym_section = sec_index_global + sym.section - 1;
- CHECK(sym_section < num_sections_total, "Buffer overflow",);
- CHECK(linker->section_addresses[sym_section] != 0, "Sanity",);
- sym_address = linker->section_addresses[sym_section] + sym.value.offset;
+ i64 sym_section = sec_index_global + sym.section - 1;
+ CHECK(sym_section < num_sections_total, "Buffer overflow",);
+ CHECK(linker->sections[sym_section].address != 0, "Sanity",);
+ sym_address += linker->sections[sym_section].address;
}
CHECK(num_symbols < linker->max_num_symbols, "Too many symbols",);
- linker->symbols[num_symbols++] = (Symbol_Entry) {
+ linker->symbols[num_symbols++] = (Link_Sym_Entry) {
.name_size = sym.name.size,
.name = sym_name,
.address = sym_address,
@@ -2858,84 +2741,37 @@ i64 unit_write_in_memory(
// ==========================================================
//
- // TODO Add runtime library symbols
- //
- // __ehdr_start
- // _GLOBAL_OFFSET_TABLE_
- // _DYNAMIC
- //
- // _Unwind_Resume
- // _Unwind_Backtrace
- // _Unwind_ForcedUnwind
- // _Unwind_GetIP
- // _Unwind_GetCFA
- //
- // _init
- // _fini
- // _end
- // _dl_rtld_map
- // __pthread_initialize_minimal
- // __init_array_start
- // __init_array_end
- // __fini_array_start
- // __fini_array_end
- // __rela_iplt_start
- // __rela_iplt_end
- // __preinit_array_start
- // __preinit_array_end
- // __start___libc_atexit
- // __stop___libc_atexit
- // __stop___libc_IO_vtables
- // __start___libc_IO_vtables
- // __start___libc_subfreeres
- // __stop___libc_subfreeres
- // __start___libc_freeres_ptrs
- // __stop___libc_freeres_ptrs
- // __gcc_personality_v0
- //
- // __addtf3
- // __subtf3
- // __multf3
- // __divtf3
- // __eqtf2
- // __lttf2
- // __letf2
- // __gttf2
- // __getf2
- // __unordtf2
-
- CHECK(num_symbols + 2 <= linker->max_num_symbols, "Too many symbols",);
-
- linker->symbols[num_symbols++] = (Symbol_Entry) {
+ // Add runtime library symbols
+
+ CHECK(num_symbols + 3 <= linker->max_num_symbols, "Too many symbols",);
+
+ linker->symbols[num_symbols++] = (Link_Sym_Entry) {
.name_size = 12,
.name = "__ehdr_start",
.address = base_address,
.size = ELF_HEADER_SIZE,
};
- // Add the dummy _GLOBAL_OFFSET_TABLE_ segment
+ i64 sym_index_got = num_symbols;
- linker->symbols[num_symbols++] = (Symbol_Entry) {
+ linker->symbols[num_symbols++] = (Link_Sym_Entry) {
.name_size = 21,
.name = "_GLOBAL_OFFSET_TABLE_",
- .address = rw_got_address,
- .size = ELF_GOT_ENTRY_SIZE,
};
- // Add the dummy _DYNAMIC segment
+ i64 sym_index_dynamic = num_symbols;
- linker->symbols[num_symbols++] = (Symbol_Entry) {
- .name_size = 8,
- .name = "_DYNAMIC",
- .address = rw_dynamic_address,
- .size = ELF_DYNAMIC_ENTRY_SIZE,
+ linker->symbols[num_symbols++] = (Link_Sym_Entry) {
+ .name_size = 8,
+ .name = "_DYNAMIC",
+ .got_offset = 0,
};
- // ==============================================================
+ // ==========================================================
//
- // Apply relocations
+ // Process relocations and build global offset table
- for (i64 elf_index = 0, sec_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) {
+ for (i64 elf_index = 0, sec_index_global = 0, rel_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) {
Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index);
i64 num_sections = elf_section_headers(buf).num;
@@ -2951,24 +2787,25 @@ i64 unit_write_in_memory(
i64 dst_index_global = sec_index_global + dst_index - 1;
CHECK(dst_index_global >= 0 && dst_index_global < linker->max_num_sections, "Buffer overflow",);
- for (i64 entry_index = 0; entry_index < src_sec.num_entries; ++entry_index) {
- Elf_Relx_Entry relx = elf_relx(buf, symtab, strtab, src_sec.data, entry_index, src_sec.type == SEC_RELA);
-
- c8 *sym_name = elf_name_from_offset(buf, relx.symbol.name);
+ for (i64 entry_index = 0; entry_index < src_sec.num_entries; ++entry_index, ++rel_index_global) {
+ Elf_Relx_Entry relx = elf_relx(buf, symtab, strtab, src_sec.data, entry_index, src_sec.type == SEC_RELA);
+ c8 * sym_name = elf_name_from_offset(buf, relx.symbol.name);
+ i64 sym_index_global = num_symbols;
- Symbol_Entry symbol = {0};
+ CHECK(num_symbols < linker->max_num_symbols, "Out of memory",);
if (relx.symbol.section == 0) {
b8 found = 0;
for (i64 i = 0; i < pool->num_entities; ++i)
- if (pool->entities[i].is_enabled &&
- pool->entities[i].type == ENTITY_PROC &&
- pool->entities[i].proc.name_size == relx.symbol.name.size &&
- mem_eq(pool->entities[i].proc.name, sym_name, relx.symbol.name.size)) {
+ if (pool->entities[i].is_enabled &&
+ pool->entities[i].type == ENTITY_PROC &&
+ str_eq(pool->entities[i].proc.name_size,
+ pool->entities[i].proc.name,
+ relx.symbol.name.size, sym_name)) {
CHECK(pool->entities[i].proc.codegen.emit_done, "No proc code",);
- symbol = (Symbol_Entry) {
+ linker->symbols[num_symbols++] = (Link_Sym_Entry) {
.address = rx_code_address + pool->entities[i].proc.codegen.offset,
.size = relx.symbol.value.size,
};
@@ -2979,39 +2816,110 @@ i64 unit_write_in_memory(
if (!found)
for (i64 i = 0; i < num_symbols; ++i)
- if (linker->symbols[i].name_size == relx.symbol.name.size &&
- mem_eq(
- linker->symbols[i].name,
- sym_name,
- relx.symbol.name.size
- )) {
- symbol = linker->symbols[i];
-
+ if (str_eq(linker->symbols[i].name_size, linker->symbols[i].name, relx.symbol.name.size, sym_name)) {
+ sym_index_global = i;
found = 1;
break;
}
- if (!found &&
- find_str_in_table(
- linker->not_found_buffer,
- linker->not_found_buffer + not_found_size,
- sym_name,
- sym_name + relx.symbol.name.size
- ) == NULL) {
- // FIXME
+ if (!found) {
LOG(WARNING, "Undefined symbol: %s", sym_name);
CHECK(not_found_size + relx.symbol.name.size + 1 <= linker->max_not_found_size, "Out of memory",);
mem_cpy(linker->not_found_buffer + not_found_size, sym_name, relx.symbol.name.size);
not_found_size += relx.symbol.name.size + 1;
+
+ linker->symbols[num_symbols++] = (Link_Sym_Entry) {
+ .address = 0,
+ .size = 0,
+ .name_size = relx.symbol.name.size,
+ .name = sym_name
+ };
}
} else {
+ i64 const SEARCH_RANGE = 1024;
+
i64 src_index_global = sec_index_global + relx.symbol.section - 1;
+ i64 address = relx.symbol.value.offset + linker->sections[src_index_global].address;
- symbol = (Symbol_Entry) {
- .address = relx.symbol.value.offset + linker->section_addresses[src_index_global],
- .size = relx.symbol.value.size,
- };
+ b8 found = 0;
+
+ for (i64 k = 1; k <= num_symbols && k <= SEARCH_RANGE; ++k)
+ if (linker->symbols[num_symbols - k].address == address) {
+ sym_index_global = num_symbols - k;
+ found = 1;
+ break;
+ }
+
+ if (!found)
+ linker->symbols[num_symbols++] = (Link_Sym_Entry) {
+ .address = address,
+ .size = relx.symbol.value.size,
+ };
+ }
+
+ if (sym_index_global >= num_symbols)
+ LOG(ERROR, "Symbol: %s", sym_name);
+ CHECK(sym_index_global < num_symbols, "Symbol not found",);
+ CHECK(rel_index_global < linker->max_num_rels, "Out of memory",);
+
+ linker->rels[rel_index_global].symbol = sym_index_global;
+
+ switch (relx.type) {
+ case R_X86_64_GOT32:
+ case R_X86_64_GOTPCREL:
+ case R_X86_64_GOTPCRELX:
+ case R_X86_64_REX_GOTPCRELX:
+ if (
+ !STR_EQ(relx.symbol.name.size, sym_name, "_DYNAMIC") &&
+ linker->symbols[sym_index_global].got_offset == 0) {
+ linker->symbols[sym_index_global].got_offset = rw_got_size;
+ rw_got_size += ELF_GOT_ENTRY_SIZE;
+ }
+ break;
+
+ default:;
}
+ }
+ }
+
+ sec_index_global += num_sections - 1;
+ }
+
+ i64 rw_got_address = align(ro_data_address + ro_data_size, X86_64_PAGE_SIZE);
+ i64 rw_dynamic_address = align(rw_got_address + rw_got_size, X86_64_PAGE_SIZE);
+
+ i64 rw_got_offset = align(ro_data_offset + ro_data_size, X86_64_PAGE_SIZE);
+ i64 rw_dynamic_offset = align(rw_got_offset + rw_got_size, X86_64_PAGE_SIZE);
+
+ linker->symbols[sym_index_got] .address = rw_got_address;
+ linker->symbols[sym_index_got] .size = rw_got_size;
+ linker->symbols[sym_index_dynamic].address = rw_dynamic_address;
+ linker->symbols[sym_index_dynamic].size = rw_dynamic_size;
+
+ // ==============================================================
+ //
+ // Apply relocations
+
+ for (i64 elf_index = 0, sec_index_global = 0, rel_index_global = 0; elf_index < linker->num_obj_files; ++elf_index) {
+ Buffer_Context buf = elf_buffer_context(pool, linker, linker->num_obj_files, elf_index);
+ i64 num_sections = elf_section_headers(buf).num;
+
+ Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data;
+ Offset_Size symtab = elf_find_section_by_name(buf, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data;
+
+ for (i64 sec_index = 1; sec_index < num_sections; ++sec_index) {
+ Elf_Section_Header src_sec = elf_section(buf, sec_index);
+ if (src_sec.type != SEC_REL && src_sec.type != SEC_RELA)
+ continue;
+
+ i64 dst_index = elf_find_related_section_index(buf, sec_index);
+ i64 dst_index_global = sec_index_global + dst_index - 1;
+ CHECK(dst_index_global >= 0 && dst_index_global < linker->max_num_sections, "Buffer overflow",);
+
+ for (i64 entry_index = 0; entry_index < src_sec.num_entries; ++entry_index, ++rel_index_global) {
+ Elf_Relx_Entry relx = elf_relx(buf, symtab, strtab, src_sec.data, entry_index, src_sec.type == SEC_RELA);
+ c8 * sym_name = elf_name_from_offset(buf, relx.symbol.name);
+ Link_Sym_Entry symbol = linker->symbols[linker->rels[rel_index_global].symbol];
u8 *dst = buf.begin + elf_section(buf, dst_index).data.offset + relx.offset;
@@ -3021,10 +2929,10 @@ i64 unit_write_in_memory(
i64 A = relx.addent;
// Represents the base address at which a shared object has been loaded into memory during execution. Generally, a shared object is built with a 0 base virtual address, but the execution address will be different.
- i64 B = linker->section_addresses[dst_index_global];
+ i64 B = linker->sections[dst_index_global].address;
// Represents the place (section offset or address) of the storage unit being relocated (computed using r_offset).
- i64 P = linker->section_addresses[dst_index_global] + relx.offset;
+ i64 P = linker->sections[dst_index_global].address + relx.offset;
// Represents the value of the symbol whose index resides in the relocation entry.
i64 S = symbol.address;
@@ -3036,15 +2944,14 @@ i64 unit_write_in_memory(
i64 GOT = rw_got_address;
// Represents the offset into the global offset table at which the relocation entry's symbol will reside during execution.
- i64 G = 8; // TODO
+ i64 G = symbol.got_offset;
// Represents the place (section offset or address) of the Procedure Linkage Table entry for a symbol.
i64 L = S; // TODO
switch (relx.type) {
- #define SKIP_(x) \
- if (relx.symbol.name.size == sizeof(#x) - 1 && \
- mem_eq(sym_name, #x, sizeof(#x) - 1)) \
+ #define SKIP_(x) \
+ if (str_eq(relx.symbol.name.size, sym_name, sizeof(#x) - 1, #x)) \
break;
case R_X86_64_64:
@@ -3103,46 +3010,46 @@ i64 unit_write_in_memory(
#define TODO_ FAIL("Not implemented", 0)
case R_X86_64_NONE: /* Do nothing */ break;
- case R_X86_64_64: ADD_(64, S + A); break; // 64, S + A
- case R_X86_64_PC32: ADD_(32, S + A - P); break; // 32, S + A - P
- case R_X86_64_GOT32: TODO_; break; // 32, G + A
- case R_X86_64_PLT32: ADD_(32, L + A - P); break; // 32, L + A - P
+ case R_X86_64_64: ADD_(64, S + A); break;
+ case R_X86_64_PC32: ADD_(32, S + A - P); break;
+ case R_X86_64_GOT32: TODO_; break;
+ case R_X86_64_PLT32: ADD_(32, L + A - P); break;
case R_X86_64_COPY: /* Do nothing */ break;
- case R_X86_64_GLOB_DAT: TODO_; break; // 64, S
- case R_X86_64_JUMP_SLOT: TODO_; break; // 64, S
- case R_X86_64_RELATIVE: TODO_; break; // 64, B + A
- case R_X86_64_GOTPCREL: ADD_(32, G + GOT + A - P); break; // 32, G + GOT + A - P
- case R_X86_64_32: TODO_; break; // 32, S + A
- case R_X86_64_32S: TODO_; break; // 32, S + A
- case R_X86_64_16: TODO_; break; // 16, S + A
- case R_X86_64_PC16: TODO_; break; // 16, S + A - P
- case R_X86_64_8: TODO_; break; // 8, S + A
- case R_X86_64_PC8: TODO_; break; // 8, S + A - P
+ case R_X86_64_GLOB_DAT: ADD_(64, S); break;
+ case R_X86_64_JUMP_SLOT: ADD_(64, S); break;
+ case R_X86_64_RELATIVE: ADD_(64, B + A); break;
+ case R_X86_64_GOTPCREL: ADD_(32, GOT + G + A - P); break;
+ case R_X86_64_32: ADD_(32, S + A); break;
+ case R_X86_64_32S: ADD_(32, S + A); break;
+ case R_X86_64_16: ADD_(16, S + A); break;
+ case R_X86_64_PC16: ADD_(16, S + A - P); break;
+ case R_X86_64_8: ADD_(8, S + A); break;
+ case R_X86_64_PC8: ADD_(8, S + A - P); break;
case R_X86_64_DTPMOD64: TODO_; break;
case R_X86_64_DTPOFF64: TODO_; break;
case R_X86_64_TPOFF64: TODO_; break;
case R_X86_64_TLSGD: TODO_; break;
case R_X86_64_TLSLD: TODO_; break;
case R_X86_64_DTPOFF32: TODO_; break;
- case R_X86_64_GOTTPOFF: ADD_(32, S - GOT); break; // 32, S - GOT
- case R_X86_64_TPOFF32: ADD_(32, S + A - B); break; // 32, S + A - B
- case R_X86_64_PC64: TODO_; break; // 64, S + A - P
+ case R_X86_64_GOTTPOFF: ADD_(32, S - GOT); break;
+ case R_X86_64_TPOFF32: ADD_(32, S + A - B); break;
+ case R_X86_64_PC64: ADD_(64, S + A - P); break;
case R_X86_64_GOTOFF64: TODO_; break;
- case R_X86_64_GOTPC32: TODO_; break; // 32, GOT + A - P
+ case R_X86_64_GOTPC32: ADD_(32, GOT + A - P); break;
case R_X86_64_GOT64: TODO_; break;
case R_X86_64_GOTPCREL64: TODO_; break;
- case R_X86_64_GOTPC64: TODO_; break; // 64, GOT + A - P
+ case R_X86_64_GOTPC64: ADD_(64, GOT + A - P); break;
case R_X86_64_GOTPLT64: TODO_; break;
case R_X86_64_PLTOFF64: TODO_; break;
- case R_X86_64_SIZE32: TODO_; break; // 32, Z + A
- case R_X86_64_SIZE64: TODO_; break; // 64, Z + A
+ case R_X86_64_SIZE32: ADD_(32, Z + A); break;
+ case R_X86_64_SIZE64: ADD_(64, Z + A); break;
case R_X86_64_GOTPC32_TLSDESC: TODO_; break;
case R_X86_64_TLSDESC_CALL: TODO_; break;
case R_X86_64_TLSDESC: TODO_; break;
case R_X86_64_IRELATIVE: TODO_; break;
case R_X86_64_RELATIVE64: TODO_; break;
- case R_X86_64_GOTPCRELX: TODO_; break;
- case R_X86_64_REX_GOTPCRELX: TODO_; break;
+ case R_X86_64_GOTPCRELX: ADD_(32, GOT + G + A - P); break;
+ case R_X86_64_REX_GOTPCRELX: ADD_(32, GOT + G + A - P); break;
default: FAIL("Unknown relocation type", 0);
@@ -3239,10 +3146,12 @@ i64 unit_write_in_memory(
i64 entry = rx_code_address + codegen->entry_point;
if (!codegen->has_entry) {
+ // TODO Explicitly declare _start proc
+
b8 found = 0;
- for (i64 sym_index = 0; sym_index < num_symbols; ++sym_index) if (linker->symbols[sym_index].name_size == 6 &&
- mem_eq(linker->symbols[sym_index].name, "_start", 6)) {
+ for (i64 sym_index = 0; sym_index < num_symbols; ++sym_index)
+ if (STR_EQ(linker->symbols[sym_index].name_size, linker->symbols[sym_index].name, "_start")) {
entry = linker->symbols[sym_index].address;
found = 1;
break;
@@ -3401,7 +3310,7 @@ i64 unit_write_in_memory(
for (i64 sec_index = 1; sec_index < headers.num; ++sec_index, ++sec_index_global) {
Elf_Section_Header section = elf_section(buf, sec_index);
- i64 offset = linker->section_offsets[sec_index_global];
+ i64 offset = linker->sections[sec_index_global].offset;
if (offset == 0 ||
!section.alloc ||
section.data.size == 0)
@@ -3417,6 +3326,16 @@ i64 unit_write_in_memory(
write_u64(LE, rw_dynamic_address, o + rw_got_offset, o_end);
+ for (i64 i = 0, offset = 0; i < num_symbols; ++i) {
+ Link_Sym_Entry *sym = linker->symbols + i;
+ if (sym->got_offset == 0)
+ continue;
+
+ offset += ELF_GOT_ENTRY_SIZE;
+ CHECK(offset < rw_got_size, "Sanity",);
+ write_u64(LE, sym->address, o + rw_got_offset + offset, o_end);
+ }
+
// ==============================================================
return output_size;
@@ -3552,13 +3471,13 @@ void unit_write(
mem_set(codegen->rels, 0, codegen->max_num_rels * sizeof *codegen->rels);
mem_set(codegen->buffer_code, 0, codegen->max_code_size);
- mem_set(codegen->buffer_ro_data, 0, codegen->max_ro_data_size);
+ mem_set(codegen->buffer_ro_data, 0, codegen->max_ro_data_size);
mem_set(linker->obj_file_buffer, 0, linker->max_obj_file_size);
mem_set(linker->dependencies_buffer, 0, linker->max_dependencies_size);
mem_set(linker->obj_file_offsets, 0, linker->max_num_obj_files * sizeof *linker->obj_file_offsets);
- mem_set(linker->section_offsets, 0, linker->max_num_sections * sizeof *linker->section_offsets);
- mem_set(linker->section_addresses, 0, linker->max_num_sections * sizeof *linker->section_addresses);
+ mem_set(linker->sections, 0, linker->max_num_sections * sizeof *linker->sections);
mem_set(linker->symbols, 0, linker->max_num_symbols * sizeof *linker->symbols);
+ mem_set(linker->rels, 0, linker->max_num_rels * sizeof *linker->rels);
mem_set(linker->not_found_buffer, 0, linker->max_not_found_size);
mem_set(linker->output_buffer, 0, linker->max_output_size);
}
@@ -3830,19 +3749,20 @@ Linker_Context g_linker = {
.max_num_obj_files = MAX_NUM_OBJECT_FILES,
.max_num_sections = MAX_NUM_SECTIONS,
.max_num_symbols = MAX_NUM_SYMBOLS,
+ .max_num_rels = MAX_NUM_LINK_RELS,
.max_not_found_size = MAX_NOT_FOUND_SIZE,
.max_output_size = MAX_OUTPUT_SIZE,
.num_obj_files = 0,
- .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0},
- .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0},
- .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0},
- .section_offsets = (i64[MAX_NUM_SECTIONS]) {0},
- .section_addresses = (i64[MAX_NUM_SECTIONS]) {0},
- .symbols = (Symbol_Entry[MAX_NUM_SYMBOLS]) {0},
- .not_found_buffer = (c8[MAX_NOT_FOUND_SIZE]) {0},
- .output_buffer = (u8[MAX_OUTPUT_SIZE]) {0},
+ .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0},
+ .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0},
+ .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0},
+ .sections = (Link_Sec_Entry[MAX_NUM_SECTIONS]) {0},
+ .symbols = (Link_Sym_Entry[MAX_NUM_SYMBOLS]) {0},
+ .rels = (Link_Rel_Entry[MAX_NUM_LINK_RELS]) {0},
+ .not_found_buffer = (c8[MAX_NOT_FOUND_SIZE]) {0},
+ .output_buffer = (u8[MAX_OUTPUT_SIZE]) {0},
};
// Handy procedures
@@ -4031,34 +3951,13 @@ b8 link_with_libc(void) {
);
}
- // // Add the entry point.
- // i64 entry = p_new_entry(u);
- // {
- // // Initialize libc
- // N_CALL_BY_NAME(
- // entry,
- // "__libc_start_main",
- // n_ref(entry, mainproc), // main
- // n_i32(entry, 0), // argc
- // n_ref(entry, n_ptr(entry, 0)), // argv
- // n_ptr(entry, 0), // init
- // n_ptr(entry, 0), // fini
- // n_ptr(entry, 0), // rtld_fini
- // n_ptr(entry, 0) // stack_end
- // );
-
- // // Return
- // n_ret(entry, 0, NULL);
- // }
-
// ============================================================
//
// Compile and link
- // Add a static library.
+ // Add dependencies
l_static(u, "c");
l_object(u, "crt1");
- // l_static(u, "test");
// Write the compilation unit into an executable file.
u_elf_x86_64(u, "test_foo");