From e818389ae98b92ad1933549d80736d682f4dbf15 Mon Sep 17 00:00:00 2001 From: Mitya Selivanov Date: Wed, 24 Jul 2024 07:18:48 +0200 Subject: Add not found buffer --- bxgen.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 202 insertions(+), 61 deletions(-) (limited to 'bxgen.c') diff --git a/bxgen.c b/bxgen.c index 743f75a..03ea30d 100755 --- a/bxgen.c +++ b/bxgen.c @@ -20,8 +20,8 @@ #/ #/ Inspirations #/ -#/ - Cuik https://github.com/RealNeGate/Cuik #/ - tinycc https://repo.or.cz/w/tinycc.git +#/ - Cuik https://github.com/RealNeGate/Cuik #/ - QBE https://c9x.me/compile/ #/ #/ To-Do list @@ -33,6 +33,7 @@ #/ - String table for names and arrays #/ - Proper prefixes for identifiers #/ - Effective entity allocation +#/ - Improve error handling #/ - Implicit procedure prototypes #/ - Implicit exit after ret from entry point #/ - Static single-assignment @@ -73,14 +74,17 @@ #/ SRC=${0##*./} BIN=${SRC%.*} -gcc \ - -Wno-old-style-declaration -Wno-missing-braces \ - -Wno-unused-variable -Wno-unused-but-set-variable \ - -Wno-unused-parameter \ - -Wall -Wextra -Werror -pedantic \ - -O0 -fsanitize=undefined,address,leak -mshstk \ - -o $BIN $SRC && \ - ./$BIN $@ && \ +gcc \ + -Wall -Wextra -Werror -pedantic \ + -Wno-old-style-declaration \ + -Wno-missing-braces \ + -Wno-unused-variable \ + -Wno-unused-but-set-variable \ + -Wno-unused-parameter \ + -O3 \ + -fsanitize=undefined,address,leak -mshstk \ + -o $BIN $SRC && \ + ./$BIN $@ && \ rm $BIN exit $? # */ #endif @@ -120,6 +124,8 @@ exit $? # */ // // ================================================================ +#define BX_VERSION "dev" + typedef signed char i8; typedef signed short i16; typedef signed int i32; @@ -154,8 +160,9 @@ enum { MAX_NUM_OBJECT_FILES = 10 * 1024, MAX_NUM_SECTIONS = 100 * 1024, - MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB - MAX_DEPENDENCIES_SIZE = 300 * 1024 * 1024, // 300 MB + MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB + MAX_DEPENDENCIES_SIZE = 50 * 1024 * 1024, // 50 MB + MAX_NOT_FOUND_SIZE = 10 * 1024, // 10 KB MAX_PATH_SIZE = 10 * 1024, MAX_LITERAL_SIZE = 400, @@ -354,17 +361,20 @@ typedef struct { i64 capacity; Entity *entities; - // Linker buffers + // TEMP Linker buffers // TODO Use string table for buffers also. - i64 max_obj_file_size; + i64 max_obj_file_size; + i64 max_dependencies_size; + i64 max_num_obj_files; + i64 max_num_sections; + i64 max_not_found_size; + u8 * obj_file_buffer; - i64 max_dependencies_size; u8 * dependencies_buffer; - i64 max_num_obj_files; i64 *obj_file_offsets; - i64 max_num_sections; i64 *section_offsets; + c8 * not_found_buffer; } Pool; // ================================================================ @@ -377,11 +387,22 @@ typedef struct { extern "C" { #endif -// Hooks. Shoud be implemented on the user side +// ================================================================ +// +// Hooks +// +// NOTE +// Shoud be implemented on the user side. +// See: `* Helper procedures` +// void bx_log(i32 log_level, u32 line, c8 *file, c8 *format, ...); void bx_assert(b8 condition, c8 *message, u32 line, c8 *file); void io_dispatch(i16 op, i64 *id, i64 *size, void *data, void *user_data); +// ================================================================ +// +// Main API +// i64 pool_add(Pool *pool, Entity data); void pool_remove(Pool *pool, i64 entity, i16 type); @@ -419,7 +440,9 @@ i64 io_write(i64 f, i64 size, void *data, void *user_data); void io_chmod_exe(i64 f, void *user_data); // ================================================================ - +// +// Helpers API +// #ifndef DISABLE_HELPERS i64 n_i64(i64 value); i64 n_call(i16 convention, i64 target_proc, i64 num_args, Var *args); @@ -435,6 +458,7 @@ void l_code(i64 unit, i64 link_unit); void l_object(i64 unit, c8 *object_library); void l_static(i64 unit, c8 *static_library); #endif +// ================================================================ #ifdef __cplusplus } @@ -508,6 +532,14 @@ i64 bx_align(i64 x, i64 a) { #define BX_TRACE BX_LOG(TRACE, "") +void bx_mem_set(void *dst, u8 val, i64 size) { + BX_CHECK(dst != NULL, "Invalid arguments",); + BX_CHECK(size > 0, "Invalid size",); + + for (i64 i = 0; i < size; ++i) + ((u8 *)dst)[i] = val; +} + void bx_mem_cpy(void *dst, void *src, i64 size) { BX_CHECK(dst != NULL, "Invalid arguments",); BX_CHECK(src != NULL, "Invalid arguments",); @@ -580,12 +612,28 @@ c8 *bx_find_str(c8 *s, c8 *s_end, c8 *sub, c8 *sub_end) { return NULL; } -i64 bx_i64_from_str(c8 *s, c8 *s_end) { +c8 *bx_find_str_in_table(c8 *buf, c8 *buf_end, c8 *sub, c8 *sub_end) { + BX_CHECK(buf != NULL, "Invalid arguments", NULL); + BX_CHECK(buf_end != NULL, "Invalid arguments", NULL); + BX_CHECK(sub != NULL, "Invalid arguments", NULL); + BX_CHECK(sub_end != NULL, "Invalid arguments", NULL); + + while (buf < buf_end) { + i64 len = bx_str_len(buf, buf_end); + if (sub_end - sub == len && bx_mem_eq(buf, sub, len)) + return buf; + buf += len + 1; + } + + return NULL; +} + +u64 bx_u64_from_dec_str(c8 *s, c8 *s_end) { BX_CHECK(s != NULL && s_end != NULL, "Invalid arguments", 0); BX_CHECK(s < s_end, "Buffer overflow", 0); BX_CHECK(*s >= '0' && *s <= '9', "Parsing failed", 0); - i64 x = 0; + u64 x = 0; for (; s < s_end && *s >= '0' && *s <= '9'; ++s) x = (x * 10) + (*s - '0'); @@ -969,11 +1017,11 @@ enum { }; typedef struct { - unsigned little:1; + unsigned first:1; } Bits; u32 host_bit_order() { - if ((*(Bits *) &(u8) { 1 }).little == 1) + if ((*(Bits *) &(u8) { 1 }).first == 1) return BIT_LE; return BIT_BE; } @@ -996,6 +1044,16 @@ u32 host_dword_order() { return DWORD_BE; } +void check_f32_format() { + // FIXME + if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffffull) == 0x40301fcbull) + return; + if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffff00000000ull) == 0x40301fcb00000000ull) + return; + + BX_FAIL("Unknown host floating-point number format",); +} + u32 host_f64_dword_order() { if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffffull) == 0x40301fcbull) return host_dword_order() == DWORD_LE ? F64_DWORD_LE : F64_DWORD_BE; @@ -1173,7 +1231,7 @@ i64 read_i64(u32 ordering, void *v, void *v_end) { } f32 read_f32(u32 ordering, void *v, void *v_end) { - host_f64_dword_order(); // FIXME + check_f32_format(); return *(f32 *) &(u32) { read_u32(ordering, v, v_end) }; } @@ -1200,7 +1258,7 @@ void write_i64(u32 ordering, i64 x, void *v, void *v_end) { } void write_f32(u32 ordering, f32 x, void *v, void *v_end) { - host_f64_dword_order(); // FIXME + check_f32_format(); void *p = &x; write_u32(ordering, *(u32 *) p, v, v_end); } @@ -1227,8 +1285,8 @@ void write_f64(u32 ordering, f64 x, void *v, void *v_end) { // // Docs and helpful materials // -// AR https://man.freebsd.org/cgi/man.cgi?query=ar&sektion=5 -// ELF https://man7.org/linux/man-pages/man5/elf.5.html +// AR https://man.freebsd.org/cgi/man.cgi?query=ar&sektion=5 +// ELF https://man7.org/linux/man-pages/man5/elf.5.html // // Relocation types // https://intezer.com/blog/malware-analysis/executable-and-linkable-format-101-part-3-relocations/ @@ -1887,7 +1945,7 @@ void elf_checks(Buffer_Context b) { BX_CHECK( read_u8 (LE, begin + 4, end) == ELF_64, "Unsupported ELF file",); BX_CHECK( read_u8 (LE, begin + 5, end) == ELF_2_LE, "Unsupported ELF file",); BX_CHECK( read_u8 (LE, begin + 6, end) == ELF_VERSION, "Unsupported ELF file",); - BX_CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",); + BX_CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",); BX_CHECK( read_u8 (LE, begin + 8, end) == ELF_ABI_VERSION, "Unsupported ELF file",); BX_CHECK( read_u16(LE, begin + 16, end) == ELF_RELOCATABLE, "Unsupported ELF file",); BX_CHECK( read_u16(LE, begin + 18, end) == ELF_X86_64, "Unsupported ELF file",); @@ -2055,6 +2113,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data BX_CHECK(pool->obj_file_buffer != NULL, "No object file buffer",); BX_CHECK(pool->dependencies_buffer != NULL, "No dependencies buffer",); BX_CHECK(pool->obj_file_offsets != NULL, "No object file offsets buffer",); + BX_CHECK(pool->not_found_buffer != NULL, "No not found buffer",); // ============================================================== // @@ -2089,6 +2148,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data i64 num_obj_files = 0; i64 obj_files_size = 0; + i64 not_found_size = 0; // Read all dependency files into the memory // @@ -2100,9 +2160,10 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data if (id == UNDEFINED) continue; Unit *l = &pool->entities[id].unit; - BX_CHECK(pool->entities[id].is_enabled, "Internal",); - BX_CHECK(l->type == UNIT_LIBRARY_STATIC, "Link type not supported",); - BX_CHECK(l->name_size > 0 && l->name_size <= MAX_NAME_SIZE, "Link name too big",); + BX_CHECK(pool->entities[id].is_enabled, "Internal",); + BX_CHECK(l->type == UNIT_LIBRARY_STATIC, "Link type not supported",); + BX_CHECK(l->name_size > 0, "No link name",); + BX_CHECK(l->name_size <= MAX_NAME_SIZE, "Link name too big",); i64 f = io_open_read(l->name_size, l->name, io_user_data); io_seek(f, 0, IO_SEEK_END, io_user_data); @@ -2133,7 +2194,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data u8 *f_end = f_begin + 58; u8 *f_data = f_begin + 60; - i64 size = bx_i64_from_str((c8 *) f_size, (c8 *) f_size + 10); + i64 size = (i64) bx_u64_from_dec_str((c8 *) f_size, (c8 *) f_size + 10); size = bx_align(size, 2); @@ -2170,7 +2231,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); elf_checks(buf); - elf_dump(VERBOSE, buf); + //elf_dump(VERBOSE, buf); Offset_Num headers = elf_section_headers(buf); @@ -2222,7 +2283,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data Symbol_Entry sym = elf_symbol(buf, tab.data, strtab, sym_index); c8 * sym_name = elf_name_from_offset(buf, sym.name); - if (sym.section == 0) + if (sym.section == 0) // undefined symbol continue; i64 sec_index = sec_index_global + sym.section - 1; @@ -2239,7 +2300,57 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data // ========================================================== // - // Resolve undefined symbols + // TODO Add runtime library symbols + // + // _DYNAMIC + // _GLOBAL_OFFSET_TABLE_ + // + // _Unwind_Resume + // _Unwind_Backtrace + // _Unwind_ForcedUnwind + // _Unwind_GetIP + // _Unwind_GetCFA + // + // _init + // _end + // _fini + // _dl_rtld_map + // __ehdr_start + // __pthread_initialize_minimal + // __init_array_start + // __init_array_end + // __fini_array_start + // __fini_array_end + // __rela_iplt_start + // __rela_iplt_end + // __preinit_array_start + // __preinit_array_end + // __start___libc_atexit + // __stop___libc_atexit + // __stop___libc_IO_vtables + // __start___libc_IO_vtables + // __start___libc_subfreeres + // __stop___libc_subfreeres + // __start___libc_freeres_ptrs + // __stop___libc_freeres_ptrs + // __gcc_personality_v0 + // + // __addtf3 + // __subtf3 + // __multf3 + // __divtf3 + // __eqtf2 + // __lttf2 + // __letf2 + // __gttf2 + // __getf2 + // __unordtf2 + + // ========================================================== + // + // FIXME PERF Resolve undefined symbols + + BX_LOG(VERBOSE, "Resolve undefined symbols"); sec_index_global = 0; @@ -2269,12 +2380,14 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data i64 offset = 0; b8 found = 0; - for (i64 search_index = 0; search_index < num_obj_files; ++search_index) { - Buffer_Context search_buf = elf_buffer_context(pool, num_obj_files, search_index); - - Offset_Size search_strtab = elf_find_section_by_name(search_buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; + // Check if the symbol is already saved as not found + if (bx_find_str_in_table(pool->not_found_buffer, pool->not_found_buffer + not_found_size, sym_name, sym_name + sym.name.size) != NULL) + continue; - i64 search_num_sections = elf_section_headers(search_buf).num; + for (i64 search_index = 0; search_index < num_obj_files; ++search_index) { + Buffer_Context search_buf = elf_buffer_context(pool, num_obj_files, search_index); + Offset_Size search_strtab = elf_find_section_by_name(search_buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; + i64 search_num_sections = elf_section_headers(search_buf).num; for (i64 search_sec = 1; search_sec < search_num_sections; ++search_sec) { Section_Header search_symtab = elf_section(search_buf, search_sec); @@ -2282,7 +2395,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data continue; for (i64 k = 1; k < search_symtab.num_entries; ++k) { - Symbol_Entry search_sym = elf_symbol(search_buf, search_symtab.data, search_strtab, k); + Symbol_Entry search_sym = elf_symbol(search_buf, search_symtab.data, search_strtab, k); if (search_sym.bind == BIND_LOCAL || search_sym.section == 0 || @@ -2293,26 +2406,29 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data if (!bx_mem_eq(sym_name, search_name, sym.name.size)) continue; - + sec_index += search_sym.section - 1; offset = search_sym.value.offset; found = 1; break; } - if (found == 1) + if (found) break; } - if (found == 1) + if (found) break; - + sec_index += elf_section_headers(search_buf).num - 1; } - if (!found) - BX_LOG(ERROR, "Symbol not found: %s", sym_name); - else { + if (!found) { + BX_CHECK(not_found_size + sym.name.size + 1 < pool->max_not_found_size, "Out of memory",); + + bx_mem_cpy(pool->not_found_buffer + not_found_size, sym_name, sym.name.size); + not_found_size += sym.name.size + 1; + } else { u8 *begin = buf.begin + tab.data.offset + sym_index * ELF_SYMBOL_ENTRY_SIZE; u8 *end = begin + tab.data.size; @@ -2345,11 +2461,9 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data // TODO Search symbols for (i64 elf_index = 0; elf_index < num_obj_files; ++elf_index) { - Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); - - Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; - - i64 num_sections = elf_section_headers(buf).num; + Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); + Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; + i64 num_sections = elf_section_headers(buf).num; for (i64 sec_index = 1; sec_index < num_sections; ++sec_index) { Section_Header section = elf_section(buf, sec_index); @@ -2365,21 +2479,25 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data continue; if (bx_mem_eq(elf_name_from_offset(buf, sym.name), "printf", 6)) { - if (sym_printf.section != 0) - BX_LAX(0, "Symbol redefinition"); sym_printf = sym; + break; } } + + if (sym_printf.section != 0) + break; } } // ============================================================== // - // TODO Write sections into the output buffer. + // TODO Write sections into the output buffer // ============================================================== // // Writing the ELF executable + // + // TODO Write into memory. BX_LOG(VERBOSE, "Total size"); BX_LOG(VERBOSE, ".text - %lld bytes", text_size); @@ -2387,11 +2505,20 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data BX_LOG(VERBOSE, ".data - %lld bytes", data_size); BX_LOG(VERBOSE, ".rodata - %lld bytes", rodata_size); - BX_LOG(VERBOSE, ""); + if (not_found_size > 0) { + BX_LOG(ERROR, "Symbols not found"); + BX_LOG(VERBOSE, ""); + c8 *s_end = pool->not_found_buffer + not_found_size; + for (c8 *s = pool->not_found_buffer; s < s_end; s += bx_str_len(s, s_end) + 1) + BX_LOG(VERBOSE, "%s", s); + BX_LOG(VERBOSE, ""); + } BX_CHECK(sym_printf.section != 0, "Symbol not found: printf",); BX_LOG(VERBOSE, "Found printf: %08llx", sym_printf.value.offset); + BX_LOG(VERBOSE, "Writing ELF x86_64 executable"); + #define WRITE(x, n) io_write( io_out, n, x, io_user_data ) #define WRITE_V(...) io_write( io_out, sizeof((u8[]) {__VA_ARGS__}), (u8[]) {__VA_ARGS__}, io_user_data ) #define WRITE_DUP(x, n) io_write( io_out, n, (u8[n]) { 0 }, io_user_data ) @@ -2452,6 +2579,16 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data #undef WRITE_64 #undef WRITE + + // ============================================================== + // + // Cleanup + + bx_mem_set(pool->obj_file_buffer, 0, pool->max_obj_file_size); + bx_mem_set(pool->dependencies_buffer, 0, pool->max_dependencies_size); + bx_mem_set(pool->obj_file_offsets, 0, pool->max_num_obj_files * sizeof *pool->obj_file_offsets); + bx_mem_set(pool->section_offsets, 0, pool->max_num_sections * sizeof *pool->section_offsets); + bx_mem_set(pool->not_found_buffer, 0, pool->max_not_found_size); } i64 io_open_read(i64 name_size, c8 *name, void *user_data) { @@ -2698,13 +2835,16 @@ Pool g_pool = { .entities = (Entity[MAX_NUM_ENTITIES]) {0}, .max_obj_file_size = MAX_OBJECT_FILE_SIZE, - .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0}, .max_dependencies_size = MAX_DEPENDENCIES_SIZE, - .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0}, .max_num_obj_files = MAX_NUM_OBJECT_FILES, - .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0}, .max_num_sections = MAX_NUM_SECTIONS, - .section_offsets = (i64[MAX_NUM_SECTIONS]) {0}, + .max_not_found_size = MAX_NOT_FOUND_SIZE, + + .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0}, + .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0}, + .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0}, + .section_offsets = (i64[MAX_NUM_SECTIONS]) {0}, + .not_found_buffer = (c8[MAX_NOT_FOUND_SIZE]) {0}, }; // Handy procedures @@ -2787,6 +2927,8 @@ int main(int argc, char **argv) { (void) argc; (void) argv; + BX_LOG(INFO, "bxgen " BX_VERSION); + // Add the `main` procedure. i64 main = p_new("main"); @@ -2822,7 +2964,6 @@ int main(int argc, char **argv) { // l_static(u, "libtest.a"); // Write the compilation unit into an executable file. - BX_LOG(VERBOSE, "Writing ELF x86_64 executable..."); u_elf_x86_64(u, "test_foo"); BX_CHECK(HOST == HOST_Linux, "Host system is not compatible", -1); -- cgit v1.2.3