diff options
author | Mitya Selivanov <automainint@guattari.tech> | 2024-07-19 16:15:33 +0200 |
---|---|---|
committer | Mitya Selivanov <automainint@guattari.tech> | 2024-07-19 16:15:33 +0200 |
commit | 9208e3a07ebf138981877240eb63867942d52e42 (patch) | |
tree | d2796529273797ce969030b20d98472449dee391 | |
parent | 257e660d7c4ec0d2e122c5b1675c7823041c83c9 (diff) | |
download | bxgen-9208e3a07ebf138981877240eb63867942d52e42.zip |
Relocating symbols
-rwxr-xr-x | bxgen.c | 332 |
1 files changed, 181 insertions, 151 deletions
@@ -73,12 +73,13 @@ #/ SRC=${0##*./} BIN=${SRC%.*} -gcc \ - -Wno-old-style-declaration -Wno-missing-braces \ - -Wno-unused-variable -Wno-unused-but-set-variable \ - -Wall -Wextra -Werror -pedantic \ - -O0 -fsanitize=undefined,address,leak -mshstk \ - -o $BIN $SRC && \ +gcc \ + -Wno-old-style-declaration -Wno-missing-braces \ + -Wno-unused-variable -Wno-unused-but-set-variable \ + -Wno-unused-parameter \ + -Wall -Wextra -Werror -pedantic \ + -O0 -fsanitize=undefined,address,leak -mshstk \ + -o $BIN $SRC && \ ./$BIN $@ exit $? # */ #endif @@ -148,10 +149,13 @@ enum { // Limits // - MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, - MAX_NUM_OBJECT_FILES = 10 * 1024, - MAX_DEPENDENCIES_SIZE = 300 * 1024 * 1024, STRING_TABLE_ALIGNMENT = 16, // TODO + + MAX_NUM_OBJECT_FILES = 10 * 1024, + MAX_NUM_SECTIONS = 100 * 1024, + MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB + MAX_DEPENDENCIES_SIZE = 300 * 1024 * 1024, // 300 MB + MAX_PATH_SIZE = 10 * 1024, MAX_LITERAL_SIZE = 400, MAX_NAME_SIZE = 80, @@ -358,6 +362,8 @@ typedef struct { u8 * dependencies_buffer; i64 max_num_obj_files; i64 *obj_file_offsets; + i64 max_num_sections; + i64 *section_offsets; } Pool; // ================================================================ @@ -1512,6 +1518,21 @@ i64 ar_find_symbol_offset_by_name( BX_FAIL("Symbol not found", 0); } +Buffer_Context elf_buffer_context( + Pool *pool, + i64 num_obj_files, + i64 elf_index +) { + return (Buffer_Context) { + .begin = pool->dependencies_buffer, + .end = pool->dependencies_buffer + pool->obj_file_offsets[num_obj_files], + .elf = { + .offset = pool->obj_file_offsets[elf_index], + .size = pool->obj_file_offsets[elf_index + 1] - pool->obj_file_offsets[elf_index], + }, + }; +} + Offset_Num elf_section_headers( Buffer_Context b ) { @@ -1594,7 +1615,7 @@ i64 elf_find_section_index_by_name( return i; } - BX_FAIL("Section not found", 0); + return 0; } Section_Header elf_section( @@ -1639,7 +1660,8 @@ Section_Header elf_find_section_by_name( c8 * name, i64 name_size ) { - return elf_section(b, elf_find_section_index_by_name(b, name, name_size)); + i64 index = elf_find_section_index_by_name(b, name, name_size); + return index == 0 ? (Section_Header) {0} : elf_section(b, index); } c8 *elf_name_from_offset( @@ -1811,12 +1833,65 @@ Symbol_Entry elf_find_symbol_by_name( BX_FAIL("Not found", (Symbol_Entry) {0}); } +void elf_checks(Buffer_Context b) { + u8 *begin = b.begin + b.elf.offset; + u8 *end = begin + b.elf.size; + + u8 osabi = read_u8(LE, begin + 7, end); + + BX_CHECK( read_u8 (LE, begin, end) == ELF_MAGIC[0], "Invalid ELF file",); + BX_CHECK( read_u8 (LE, begin + 1, end) == ELF_MAGIC[1], "Invalid ELF file",); + BX_CHECK( read_u8 (LE, begin + 2, end) == ELF_MAGIC[2], "Invalid ELF file",); + BX_CHECK( read_u8 (LE, begin + 3, end) == ELF_MAGIC[3], "Invalid ELF file",); + + BX_CHECK( read_u8 (LE, begin + 4, end) == ELF_64, "Unsupported ELF file",); + BX_CHECK( read_u8 (LE, begin + 5, end) == ELF_2_LE, "Unsupported ELF file",); + BX_CHECK( read_u8 (LE, begin + 6, end) == ELF_VERSION, "Unsupported ELF file",); + BX_CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",); + BX_CHECK( read_u8 (LE, begin + 8, end) == ELF_ABI_VERSION, "Unsupported ELF file",); + BX_CHECK( read_u16(LE, begin + 16, end) == ELF_RELOCATABLE, "Unsupported ELF file",); + BX_CHECK( read_u16(LE, begin + 18, end) == ELF_X86_64, "Unsupported ELF file",); + BX_CHECK( read_u32(LE, begin + 20, end) == ELF_VERSION, "Unsupported ELF file",); + + BX_LAX( read_u64(LE, begin + 24, end) == 0, "Invalid entry point"); + BX_LAX( read_u64(LE, begin + 32, end) == 0, "Invalid program header offset"); + BX_LAX( read_u32(LE, begin + 48, end) == 0, "Invalid flags"); + BX_LAX( read_u16(LE, begin + 52, end) == ELF_HEADER_SIZE, "Invalid ELF header size"); + BX_LAX( read_u16(LE, begin + 54, end) == 0, "Invalid program header size"); + BX_LAX( read_u16(LE, begin + 56, end) == 0, "Invalid num program headers"); + BX_LAX( read_u16(LE, begin + 58, end) == ELF_SECTION_HEADER_SIZE, "Invalid section header size"); +} + +void elf_relocate_symbols( + Buffer_Context b, + Section_Header symtab, + i64 sec_index, + i64 dst_address +) { + u8 *begin = b.begin + symtab.data.offset; + u8 *end = begin + symtab.data.size; + + for (i64 i = 0; i < symtab.num_entries; ++i) { + u8 *sym_begin = begin + i * ELF_SYMBOL_ENTRY_SIZE; + if ((i64) read_u16(LE, sym_begin + 6, end) == sec_index) + write_i64(LE, dst_address + read_i64(LE, sym_begin + 8, end), sym_begin + 8, end); + } +} + +void elf_apply_relocations( + Buffer_Context b, + i64 sec_index, + i64 dst_address +) { + //BX_LOG(WARNING, "TODO"); +} + void elf_dump(u32 log_level, Buffer_Context b) { - Offset_Num section_headers = elf_section_headers(b); - Offset_Size symbol_names = {0}; - Offset_Size symbols = {0}; + Offset_Num headers = elf_section_headers(b); + Offset_Size strtab = elf_find_section_by_name(b, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; + Offset_Size symtab = elf_find_section_by_name(b, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; - for (i64 sec_index = 0; sec_index < section_headers.num; ++sec_index) { + for (i64 sec_index = 0; sec_index < headers.num; ++sec_index) { Section_Header section = elf_section(b, sec_index); c8 *name = elf_name_from_offset(b, section.name); @@ -1844,16 +1919,10 @@ void elf_dump(u32 log_level, Buffer_Context b) { switch (section.type) { case SEC_SYMTAB: - if (symbols.offset == 0) - symbols = section.data; - - if (symbol_names.offset == 0) - symbol_names = elf_find_section_by_name(b, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - BX_LOG(log_level, " - -"); for (i64 sym_index = 0; sym_index < section.num_entries; ++sym_index) { - Symbol_Entry sym = elf_symbol(b, section.data, symbol_names, (u16) sym_index); + Symbol_Entry sym = elf_symbol(b, section.data, strtab, (u16) sym_index); c8 *name = elf_name_from_offset(b, sym.name); @@ -1884,17 +1953,12 @@ void elf_dump(u32 log_level, Buffer_Context b) { break; case SEC_RELA: { - if (symbol_names.offset == 0) - symbol_names = elf_find_section_by_name(b, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - if (symbols.offset == 0) - symbols = elf_find_section_by_name(b, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; - Offset_Size dst = elf_find_related_data(b, sec_index); BX_LOG(log_level, " - -"); for (i64 rela_index = 0; rela_index < section.num_entries; ++rela_index) { - Rela_Entry rela = elf_rela(b, symbols, symbol_names, section.data, dst, rela_index); + Rela_Entry rela = elf_rela(b, symtab, strtab, section.data, dst, rela_index); BX_LOG( log_level, @@ -1919,17 +1983,12 @@ void elf_dump(u32 log_level, Buffer_Context b) { } break; case SEC_REL: { - if (symbol_names.offset == 0) - symbol_names = elf_find_section_by_name(b, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - if (symbols.offset == 0) - symbols = elf_find_section_by_name(b, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; - Offset_Size dst = elf_find_related_data(b, sec_index); BX_LOG(log_level, " - -"); for (i64 rel_index = 0; rel_index < section.num_entries; ++rel_index) { - Rel_Entry rel = elf_rel(b, symbols, symbol_names, section.data, dst, rel_index); + Rel_Entry rel = elf_rel(b, symtab, strtab, section.data, dst, rel_index); BX_LOG( log_level, @@ -1979,12 +2038,14 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data 0x0f, 0x05, // syscall }; - i64 code_offset = bx_align(ELF_HEADER_SIZE + ELF_PROGRAM_HEADER_SIZE, X86_64_ALIGNMENT); + i64 num_program_headers = 1; + + i64 data_offset = bx_align(ELF_HEADER_SIZE + ELF_PROGRAM_HEADER_SIZE * num_program_headers, X86_64_ALIGNMENT); i64 code_size = bx_align(sizeof code, X86_64_ALIGNMENT); i64 entry_offset = 0; i64 base_address = X86_64_BASE_ADDRESS; - i64 code_address = base_address + code_offset; + i64 code_address = base_address + data_offset; i64 entry = code_address + entry_offset; Symbol_Entry sym_printf = {0}; @@ -1998,8 +2059,8 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data // // Reading dependencies - i64 im_num = 0; - i64 im_size = 0; + i64 num_obj_files = 0; + i64 obj_files_size = 0; // Read all dependency files into the memory // @@ -2057,14 +2118,14 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data i64 delta_size = bx_align(size, X86_64_ALIGNMENT); - BX_CHECK(im_size + delta_size < pool->max_dependencies_size, "Out of memory",); - BX_CHECK(im_num + 1 < pool->max_num_obj_files, "Out of memory",); + BX_CHECK(obj_files_size + delta_size < pool->max_dependencies_size, "Out of memory",); + BX_CHECK(num_obj_files + 1 < pool->max_num_obj_files, "Out of memory",); - bx_mem_cpy(pool->dependencies_buffer + im_size, f_data, size); + bx_mem_cpy(pool->dependencies_buffer + obj_files_size, f_data, size); - pool->obj_file_offsets[im_num] = im_size; - im_size += delta_size; - pool->obj_file_offsets[++im_num] = im_size; + pool->obj_file_offsets[num_obj_files] = obj_files_size; + obj_files_size += delta_size; + pool->obj_file_offsets[++num_obj_files] = obj_files_size; } f_begin = f_data + size; @@ -2073,143 +2134,110 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data // ========================================================== // - // Process ELF object files - - for (i64 elf_index = 0; elf_index < im_num; ++elf_index) { - u8 *elf_begin = pool->dependencies_buffer + pool->obj_file_offsets[elf_index]; - u8 *elf_end = pool->dependencies_buffer + pool->obj_file_offsets[elf_index + 1]; - - u8 osabi = read_u8(LE, elf_begin + 7, elf_end); - - BX_CHECK( read_u8 (LE, elf_begin, elf_end) == ELF_MAGIC[0], "Invalid ELF file",); - BX_CHECK( read_u8 (LE, elf_begin + 1, elf_end) == ELF_MAGIC[1], "Invalid ELF file",); - BX_CHECK( read_u8 (LE, elf_begin + 2, elf_end) == ELF_MAGIC[2], "Invalid ELF file",); - BX_CHECK( read_u8 (LE, elf_begin + 3, elf_end) == ELF_MAGIC[3], "Invalid ELF file",); - - BX_CHECK( read_u8 (LE, elf_begin + 4, elf_end) == ELF_64, "Unsupported ELF file",); - BX_CHECK( read_u8 (LE, elf_begin + 5, elf_end) == ELF_2_LE, "Unsupported ELF file",); - BX_CHECK( read_u8 (LE, elf_begin + 6, elf_end) == ELF_VERSION, "Unsupported ELF file",); - BX_CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",); - BX_CHECK( read_u8 (LE, elf_begin + 8, elf_end) == ELF_ABI_VERSION, "Unsupported ELF file",); - BX_CHECK( read_u16(LE, elf_begin + 16, elf_end) == ELF_RELOCATABLE, "Unsupported ELF file",); - BX_CHECK( read_u16(LE, elf_begin + 18, elf_end) == ELF_X86_64, "Unsupported ELF file",); - BX_CHECK( read_u32(LE, elf_begin + 20, elf_end) == ELF_VERSION, "Unsupported ELF file",); - - BX_LAX( read_u64(LE, elf_begin + 24, elf_end) == 0, "Invalid entry point"); - BX_LAX( read_u64(LE, elf_begin + 32, elf_end) == 0, "Invalid program header offset"); - BX_LAX( read_u32(LE, elf_begin + 48, elf_end) == 0, "Invalid flags"); - BX_LAX( read_u16(LE, elf_begin + 52, elf_end) == ELF_HEADER_SIZE, "Invalid ELF header size"); - BX_LAX( read_u16(LE, elf_begin + 54, elf_end) == 0, "Invalid program header size"); - BX_LAX( read_u16(LE, elf_begin + 56, elf_end) == 0, "Invalid num program headers"); - BX_LAX( read_u16(LE, elf_begin + 58, elf_end) == ELF_SECTION_HEADER_SIZE, "Invalid section header size"); - - Buffer_Context buf = { - .begin = pool->dependencies_buffer, - .end = pool->dependencies_buffer + pool->obj_file_offsets[im_num], - .elf = { - .offset = pool->obj_file_offsets[elf_index], - .size = pool->obj_file_offsets[elf_index + 1] - pool->obj_file_offsets[elf_index], - }, - }; + // Calculate section offsets, relocate symbols + + i64 sec_index_global = 0; + + for (i64 elf_index = 0; elf_index < num_obj_files; ++elf_index) { + Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); + elf_checks(buf); elf_dump(VERBOSE, buf); - Offset_Num section_headers = elf_section_headers(buf); - Offset_Size symbol_names = {0}; - Offset_Size symbols = {0}; + Offset_Num headers = elf_section_headers(buf); + Section_Header symtab = elf_find_section_by_name(buf, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1); + + for (i64 sec_index = 0; sec_index < headers.num; ++sec_index, ++sec_index_global) { + pool->section_offsets[sec_index_global] = 0; - for (i64 sec_index = 0; sec_index < section_headers.num; ++sec_index) { Section_Header section = elf_section(buf, sec_index); - i64 offset = 0; - - if (section.alloc) { - if (section.exec) { - offset = text_size; - text_size += bx_align(section.data.size, X86_64_ALIGNMENT); - } else if (section.write && section.type == SEC_NOBITS) { - offset = bss_size; - bss_size += bx_align(section.data.size, X86_64_ALIGNMENT); - } else if (section.write) { - offset = data_size; - data_size += bx_align(section.data.size, X86_64_ALIGNMENT); - } else if (section.data.size > 0) { - offset = rodata_size; - rodata_size += bx_align(section.data.size, X86_64_ALIGNMENT); - } else - BX_LAX(0, "Unsupported section type"); + if (!section.alloc) + continue; + + if (section.exec) { + pool->section_offsets[sec_index_global] = text_size; + text_size += bx_align(section.data.size, X86_64_ALIGNMENT); + } else if (section.write && section.type == SEC_NOBITS) { + pool->section_offsets[sec_index_global] = bss_size; + bss_size += bx_align(section.data.size, X86_64_ALIGNMENT); + } else if (section.write) { + pool->section_offsets[sec_index_global] = data_size; + data_size += bx_align(section.data.size, X86_64_ALIGNMENT); + } else if (section.data.size > 0) { + pool->section_offsets[sec_index_global] = rodata_size; + rodata_size += bx_align(section.data.size, X86_64_ALIGNMENT); + } else { + BX_LAX(0, "Unsupported section type"); + continue; } - switch (section.type) { - case SEC_SYMTAB: - if (symbols.offset == 0) - symbols = section.data; - if (symbol_names.offset == 0) - symbol_names = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - - for (i64 sym_index = 0; sym_index < section.num_entries; ++sym_index) { - Symbol_Entry sym = elf_symbol(buf, section.data, symbol_names, (u16) sym_index); - - if (sym.name.size == 6 && bx_mem_eq(elf_name_from_offset(buf, sym.name), "printf", 6)) { - if (sym_printf.value.offset != 0) - BX_LAX(0, "Symbol redefinition"); - sym_printf = sym; - } - } + elf_relocate_symbols(buf, symtab, sec_index, base_address + data_offset + pool->section_offsets[sec_index_global]); + } + } - break; + // ============================================================== + // + // TODO Apply relocations - case SEC_RELA: { - if (symbol_names.offset == 0) - symbol_names = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - if (symbols.offset == 0) - symbols = elf_find_section_by_name(buf, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; + sec_index_global = 0; - Offset_Size dst = elf_find_related_data(buf, sec_index); + for (i64 elf_index = 0; elf_index < num_obj_files; ++elf_index) { + Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); - for (i64 rela_index = 0; rela_index < section.num_entries; ++rela_index) { - Rela_Entry rela = elf_rela(buf, symbols, symbol_names, section.data, dst, rela_index); - } + elf_checks(buf); + elf_dump(VERBOSE, buf); - } break; + Offset_Num headers = elf_section_headers(buf); - case SEC_REL: { - if (symbol_names.offset == 0) - symbol_names = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - if (symbols.offset == 0) - symbols = elf_find_section_by_name(buf, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; + for (i64 sec_index = 0; sec_index < headers.num; ++sec_index, ++sec_index_global) + if (elf_section(buf, sec_index).alloc) + elf_apply_relocations(buf, sec_index, base_address + pool->section_offsets[sec_index_global]); + } - Offset_Size dst = elf_find_related_data(buf, sec_index); + // ============================================================== + // + // TODO Search symbols - for (i64 rel_index = 0; rel_index < section.num_entries; ++rel_index) { - Rel_Entry rel = elf_rel(buf, symbols, symbol_names, section.data, dst, rel_index); - } + for (i64 elf_index = 0; elf_index < num_obj_files; ++elf_index) { + Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); - } break; + Offset_Num headers = elf_section_headers(buf); + Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - default:; - } + for (i64 sec_index = 0; sec_index < headers.num; ++sec_index) { + Section_Header section = elf_section(buf, sec_index); + + if (section.type == SEC_SYMTAB) + for (i64 sym_index = 0; sym_index < section.num_entries; ++sym_index) { + Symbol_Entry sym = elf_symbol(buf, section.data, strtab, (u16) sym_index); + + if (sym.name.size == 6 && bx_mem_eq(elf_name_from_offset(buf, sym.name), "printf", 6)) { + if (sym_printf.value.offset != 0) + BX_LAX(0, "Symbol redefinition"); + sym_printf = sym; + } + } } } // ============================================================== - - // TODO Write sections into the output buffer. // + // TODO Write sections into the output buffer. // ============================================================== // // Writing the ELF executable BX_LOG(VERBOSE, "Total size"); - BX_LOG(VERBOSE, ".text: %lld", text_size); - BX_LOG(VERBOSE, ".bss: %lld", bss_size); - BX_LOG(VERBOSE, ".data: %lld", data_size); - BX_LOG(VERBOSE, ".rodata: %lld", rodata_size); + BX_LOG(VERBOSE, ".text - %lld bytes", text_size); + BX_LOG(VERBOSE, ".bss - %lld bytes", bss_size); + BX_LOG(VERBOSE, ".data - %lld bytes", data_size); + BX_LOG(VERBOSE, ".rodata - %lld bytes", rodata_size); BX_LOG(VERBOSE, ""); - BX_LOG(VERBOSE, "Found printf: %d", sym_printf.value.offset); + BX_LOG(VERBOSE, "Found printf: %08llx", sym_printf.value.offset); #define WRITE(x, n) io_write( io_out, n, x, io_user_data ) #define WRITE_V(...) io_write( io_out, sizeof((u8[]) {__VA_ARGS__}), (u8[]) {__VA_ARGS__}, io_user_data ) @@ -2250,7 +2278,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data WRITE_4( 1 ); // type (PT_LOAD) WRITE_4( 5 ); // flags (PF_X | PF_R) - WRITE_8( code_offset ); + WRITE_8( data_offset ); WRITE_8( code_address ); // virtual address WRITE_8( code_address ); // phisical address WRITE_8( code_size ); // size in file @@ -2260,7 +2288,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data // Code // - for (i64 i = code_offset - ELF_HEADER_SIZE - ELF_PROGRAM_HEADER_SIZE; i > 0; --i) + for (i64 i = data_offset - ELF_HEADER_SIZE - ELF_PROGRAM_HEADER_SIZE; i > 0; --i) WRITE_V( 0 ); WRITE( code, code_size ); @@ -2522,6 +2550,8 @@ Pool g_pool = { .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0}, .max_num_obj_files = MAX_NUM_OBJECT_FILES, .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0}, + .max_num_sections = MAX_NUM_SECTIONS, + .section_offsets = (i64[MAX_NUM_SECTIONS]) {0}, }; // Handy procedures |