summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitya Selivanov <automainint@guattari.tech>2024-07-24 07:18:48 +0200
committerMitya Selivanov <automainint@guattari.tech>2024-07-24 07:18:48 +0200
commite818389ae98b92ad1933549d80736d682f4dbf15 (patch)
treed52950beb69b7d05b5bcf3c4df7c9435e9f8543c
parent1924b3ee49a7268f24f5b39693d4e22eca418a24 (diff)
downloadbxgen-e818389ae98b92ad1933549d80736d682f4dbf15.zip
Add not found buffer
-rwxr-xr-xbxgen.c263
-rw-r--r--compile_flags.txt11
2 files changed, 212 insertions, 62 deletions
diff --git a/bxgen.c b/bxgen.c
index 743f75a..03ea30d 100755
--- a/bxgen.c
+++ b/bxgen.c
@@ -20,8 +20,8 @@
#/
#/ Inspirations
#/
-#/ - Cuik https://github.com/RealNeGate/Cuik
#/ - tinycc https://repo.or.cz/w/tinycc.git
+#/ - Cuik https://github.com/RealNeGate/Cuik
#/ - QBE https://c9x.me/compile/
#/
#/ To-Do list
@@ -33,6 +33,7 @@
#/ - String table for names and arrays
#/ - Proper prefixes for identifiers
#/ - Effective entity allocation
+#/ - Improve error handling
#/ - Implicit procedure prototypes
#/ - Implicit exit after ret from entry point
#/ - Static single-assignment
@@ -73,14 +74,17 @@
#/
SRC=${0##*./}
BIN=${SRC%.*}
-gcc \
- -Wno-old-style-declaration -Wno-missing-braces \
- -Wno-unused-variable -Wno-unused-but-set-variable \
- -Wno-unused-parameter \
- -Wall -Wextra -Werror -pedantic \
- -O0 -fsanitize=undefined,address,leak -mshstk \
- -o $BIN $SRC && \
- ./$BIN $@ && \
+gcc \
+ -Wall -Wextra -Werror -pedantic \
+ -Wno-old-style-declaration \
+ -Wno-missing-braces \
+ -Wno-unused-variable \
+ -Wno-unused-but-set-variable \
+ -Wno-unused-parameter \
+ -O3 \
+ -fsanitize=undefined,address,leak -mshstk \
+ -o $BIN $SRC && \
+ ./$BIN $@ && \
rm $BIN
exit $? # */
#endif
@@ -120,6 +124,8 @@ exit $? # */
//
// ================================================================
+#define BX_VERSION "dev"
+
typedef signed char i8;
typedef signed short i16;
typedef signed int i32;
@@ -154,8 +160,9 @@ enum {
MAX_NUM_OBJECT_FILES = 10 * 1024,
MAX_NUM_SECTIONS = 100 * 1024,
- MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB
- MAX_DEPENDENCIES_SIZE = 300 * 1024 * 1024, // 300 MB
+ MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB
+ MAX_DEPENDENCIES_SIZE = 50 * 1024 * 1024, // 50 MB
+ MAX_NOT_FOUND_SIZE = 10 * 1024, // 10 KB
MAX_PATH_SIZE = 10 * 1024,
MAX_LITERAL_SIZE = 400,
@@ -354,17 +361,20 @@ typedef struct {
i64 capacity;
Entity *entities;
- // Linker buffers
+ // TEMP Linker buffers
// TODO Use string table for buffers also.
- i64 max_obj_file_size;
+ i64 max_obj_file_size;
+ i64 max_dependencies_size;
+ i64 max_num_obj_files;
+ i64 max_num_sections;
+ i64 max_not_found_size;
+
u8 * obj_file_buffer;
- i64 max_dependencies_size;
u8 * dependencies_buffer;
- i64 max_num_obj_files;
i64 *obj_file_offsets;
- i64 max_num_sections;
i64 *section_offsets;
+ c8 * not_found_buffer;
} Pool;
// ================================================================
@@ -377,11 +387,22 @@ typedef struct {
extern "C" {
#endif
-// Hooks. Shoud be implemented on the user side
+// ================================================================
+//
+// Hooks
+//
+// NOTE
+// Shoud be implemented on the user side.
+// See: `* Helper procedures`
+//
void bx_log(i32 log_level, u32 line, c8 *file, c8 *format, ...);
void bx_assert(b8 condition, c8 *message, u32 line, c8 *file);
void io_dispatch(i16 op, i64 *id, i64 *size, void *data, void *user_data);
+// ================================================================
+//
+// Main API
+//
i64 pool_add(Pool *pool, Entity data);
void pool_remove(Pool *pool, i64 entity, i16 type);
@@ -419,7 +440,9 @@ i64 io_write(i64 f, i64 size, void *data, void *user_data);
void io_chmod_exe(i64 f, void *user_data);
// ================================================================
-
+//
+// Helpers API
+//
#ifndef DISABLE_HELPERS
i64 n_i64(i64 value);
i64 n_call(i16 convention, i64 target_proc, i64 num_args, Var *args);
@@ -435,6 +458,7 @@ void l_code(i64 unit, i64 link_unit);
void l_object(i64 unit, c8 *object_library);
void l_static(i64 unit, c8 *static_library);
#endif
+// ================================================================
#ifdef __cplusplus
}
@@ -508,6 +532,14 @@ i64 bx_align(i64 x, i64 a) {
#define BX_TRACE BX_LOG(TRACE, "")
+void bx_mem_set(void *dst, u8 val, i64 size) {
+ BX_CHECK(dst != NULL, "Invalid arguments",);
+ BX_CHECK(size > 0, "Invalid size",);
+
+ for (i64 i = 0; i < size; ++i)
+ ((u8 *)dst)[i] = val;
+}
+
void bx_mem_cpy(void *dst, void *src, i64 size) {
BX_CHECK(dst != NULL, "Invalid arguments",);
BX_CHECK(src != NULL, "Invalid arguments",);
@@ -580,12 +612,28 @@ c8 *bx_find_str(c8 *s, c8 *s_end, c8 *sub, c8 *sub_end) {
return NULL;
}
-i64 bx_i64_from_str(c8 *s, c8 *s_end) {
+c8 *bx_find_str_in_table(c8 *buf, c8 *buf_end, c8 *sub, c8 *sub_end) {
+ BX_CHECK(buf != NULL, "Invalid arguments", NULL);
+ BX_CHECK(buf_end != NULL, "Invalid arguments", NULL);
+ BX_CHECK(sub != NULL, "Invalid arguments", NULL);
+ BX_CHECK(sub_end != NULL, "Invalid arguments", NULL);
+
+ while (buf < buf_end) {
+ i64 len = bx_str_len(buf, buf_end);
+ if (sub_end - sub == len && bx_mem_eq(buf, sub, len))
+ return buf;
+ buf += len + 1;
+ }
+
+ return NULL;
+}
+
+u64 bx_u64_from_dec_str(c8 *s, c8 *s_end) {
BX_CHECK(s != NULL && s_end != NULL, "Invalid arguments", 0);
BX_CHECK(s < s_end, "Buffer overflow", 0);
BX_CHECK(*s >= '0' && *s <= '9', "Parsing failed", 0);
- i64 x = 0;
+ u64 x = 0;
for (; s < s_end && *s >= '0' && *s <= '9'; ++s)
x = (x * 10) + (*s - '0');
@@ -969,11 +1017,11 @@ enum {
};
typedef struct {
- unsigned little:1;
+ unsigned first:1;
} Bits;
u32 host_bit_order() {
- if ((*(Bits *) &(u8) { 1 }).little == 1)
+ if ((*(Bits *) &(u8) { 1 }).first == 1)
return BIT_LE;
return BIT_BE;
}
@@ -996,6 +1044,16 @@ u32 host_dword_order() {
return DWORD_BE;
}
+void check_f32_format() {
+ // FIXME
+ if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffffull) == 0x40301fcbull)
+ return;
+ if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffff00000000ull) == 0x40301fcb00000000ull)
+ return;
+
+ BX_FAIL("Unknown host floating-point number format",);
+}
+
u32 host_f64_dword_order() {
if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffffull) == 0x40301fcbull)
return host_dword_order() == DWORD_LE ? F64_DWORD_LE : F64_DWORD_BE;
@@ -1173,7 +1231,7 @@ i64 read_i64(u32 ordering, void *v, void *v_end) {
}
f32 read_f32(u32 ordering, void *v, void *v_end) {
- host_f64_dword_order(); // FIXME
+ check_f32_format();
return *(f32 *) &(u32) { read_u32(ordering, v, v_end) };
}
@@ -1200,7 +1258,7 @@ void write_i64(u32 ordering, i64 x, void *v, void *v_end) {
}
void write_f32(u32 ordering, f32 x, void *v, void *v_end) {
- host_f64_dword_order(); // FIXME
+ check_f32_format();
void *p = &x;
write_u32(ordering, *(u32 *) p, v, v_end);
}
@@ -1227,8 +1285,8 @@ void write_f64(u32 ordering, f64 x, void *v, void *v_end) {
//
// Docs and helpful materials
//
-// AR https://man.freebsd.org/cgi/man.cgi?query=ar&sektion=5
-// ELF https://man7.org/linux/man-pages/man5/elf.5.html
+// AR https://man.freebsd.org/cgi/man.cgi?query=ar&sektion=5
+// ELF https://man7.org/linux/man-pages/man5/elf.5.html
//
// Relocation types
// https://intezer.com/blog/malware-analysis/executable-and-linkable-format-101-part-3-relocations/
@@ -1887,7 +1945,7 @@ void elf_checks(Buffer_Context b) {
BX_CHECK( read_u8 (LE, begin + 4, end) == ELF_64, "Unsupported ELF file",);
BX_CHECK( read_u8 (LE, begin + 5, end) == ELF_2_LE, "Unsupported ELF file",);
BX_CHECK( read_u8 (LE, begin + 6, end) == ELF_VERSION, "Unsupported ELF file",);
- BX_CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",);
+ BX_CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",);
BX_CHECK( read_u8 (LE, begin + 8, end) == ELF_ABI_VERSION, "Unsupported ELF file",);
BX_CHECK( read_u16(LE, begin + 16, end) == ELF_RELOCATABLE, "Unsupported ELF file",);
BX_CHECK( read_u16(LE, begin + 18, end) == ELF_X86_64, "Unsupported ELF file",);
@@ -2055,6 +2113,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
BX_CHECK(pool->obj_file_buffer != NULL, "No object file buffer",);
BX_CHECK(pool->dependencies_buffer != NULL, "No dependencies buffer",);
BX_CHECK(pool->obj_file_offsets != NULL, "No object file offsets buffer",);
+ BX_CHECK(pool->not_found_buffer != NULL, "No not found buffer",);
// ==============================================================
//
@@ -2089,6 +2148,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
i64 num_obj_files = 0;
i64 obj_files_size = 0;
+ i64 not_found_size = 0;
// Read all dependency files into the memory
//
@@ -2100,9 +2160,10 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
if (id == UNDEFINED)
continue;
Unit *l = &pool->entities[id].unit;
- BX_CHECK(pool->entities[id].is_enabled, "Internal",);
- BX_CHECK(l->type == UNIT_LIBRARY_STATIC, "Link type not supported",);
- BX_CHECK(l->name_size > 0 && l->name_size <= MAX_NAME_SIZE, "Link name too big",);
+ BX_CHECK(pool->entities[id].is_enabled, "Internal",);
+ BX_CHECK(l->type == UNIT_LIBRARY_STATIC, "Link type not supported",);
+ BX_CHECK(l->name_size > 0, "No link name",);
+ BX_CHECK(l->name_size <= MAX_NAME_SIZE, "Link name too big",);
i64 f = io_open_read(l->name_size, l->name, io_user_data);
io_seek(f, 0, IO_SEEK_END, io_user_data);
@@ -2133,7 +2194,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
u8 *f_end = f_begin + 58;
u8 *f_data = f_begin + 60;
- i64 size = bx_i64_from_str((c8 *) f_size, (c8 *) f_size + 10);
+ i64 size = (i64) bx_u64_from_dec_str((c8 *) f_size, (c8 *) f_size + 10);
size = bx_align(size, 2);
@@ -2170,7 +2231,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index);
elf_checks(buf);
- elf_dump(VERBOSE, buf);
+ //elf_dump(VERBOSE, buf);
Offset_Num headers = elf_section_headers(buf);
@@ -2222,7 +2283,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
Symbol_Entry sym = elf_symbol(buf, tab.data, strtab, sym_index);
c8 * sym_name = elf_name_from_offset(buf, sym.name);
- if (sym.section == 0)
+ if (sym.section == 0) // undefined symbol
continue;
i64 sec_index = sec_index_global + sym.section - 1;
@@ -2239,7 +2300,57 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
// ==========================================================
//
- // Resolve undefined symbols
+ // TODO Add runtime library symbols
+ //
+ // _DYNAMIC
+ // _GLOBAL_OFFSET_TABLE_
+ //
+ // _Unwind_Resume
+ // _Unwind_Backtrace
+ // _Unwind_ForcedUnwind
+ // _Unwind_GetIP
+ // _Unwind_GetCFA
+ //
+ // _init
+ // _end
+ // _fini
+ // _dl_rtld_map
+ // __ehdr_start
+ // __pthread_initialize_minimal
+ // __init_array_start
+ // __init_array_end
+ // __fini_array_start
+ // __fini_array_end
+ // __rela_iplt_start
+ // __rela_iplt_end
+ // __preinit_array_start
+ // __preinit_array_end
+ // __start___libc_atexit
+ // __stop___libc_atexit
+ // __stop___libc_IO_vtables
+ // __start___libc_IO_vtables
+ // __start___libc_subfreeres
+ // __stop___libc_subfreeres
+ // __start___libc_freeres_ptrs
+ // __stop___libc_freeres_ptrs
+ // __gcc_personality_v0
+ //
+ // __addtf3
+ // __subtf3
+ // __multf3
+ // __divtf3
+ // __eqtf2
+ // __lttf2
+ // __letf2
+ // __gttf2
+ // __getf2
+ // __unordtf2
+
+ // ==========================================================
+ //
+ // FIXME PERF Resolve undefined symbols
+
+ BX_LOG(VERBOSE, "Resolve undefined symbols");
sec_index_global = 0;
@@ -2269,12 +2380,14 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
i64 offset = 0;
b8 found = 0;
- for (i64 search_index = 0; search_index < num_obj_files; ++search_index) {
- Buffer_Context search_buf = elf_buffer_context(pool, num_obj_files, search_index);
-
- Offset_Size search_strtab = elf_find_section_by_name(search_buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data;
+ // Check if the symbol is already saved as not found
+ if (bx_find_str_in_table(pool->not_found_buffer, pool->not_found_buffer + not_found_size, sym_name, sym_name + sym.name.size) != NULL)
+ continue;
- i64 search_num_sections = elf_section_headers(search_buf).num;
+ for (i64 search_index = 0; search_index < num_obj_files; ++search_index) {
+ Buffer_Context search_buf = elf_buffer_context(pool, num_obj_files, search_index);
+ Offset_Size search_strtab = elf_find_section_by_name(search_buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data;
+ i64 search_num_sections = elf_section_headers(search_buf).num;
for (i64 search_sec = 1; search_sec < search_num_sections; ++search_sec) {
Section_Header search_symtab = elf_section(search_buf, search_sec);
@@ -2282,7 +2395,7 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
continue;
for (i64 k = 1; k < search_symtab.num_entries; ++k) {
- Symbol_Entry search_sym = elf_symbol(search_buf, search_symtab.data, search_strtab, k);
+ Symbol_Entry search_sym = elf_symbol(search_buf, search_symtab.data, search_strtab, k);
if (search_sym.bind == BIND_LOCAL ||
search_sym.section == 0 ||
@@ -2293,26 +2406,29 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
if (!bx_mem_eq(sym_name, search_name, sym.name.size))
continue;
-
+
sec_index += search_sym.section - 1;
offset = search_sym.value.offset;
found = 1;
break;
}
- if (found == 1)
+ if (found)
break;
}
- if (found == 1)
+ if (found)
break;
-
+
sec_index += elf_section_headers(search_buf).num - 1;
}
- if (!found)
- BX_LOG(ERROR, "Symbol not found: %s", sym_name);
- else {
+ if (!found) {
+ BX_CHECK(not_found_size + sym.name.size + 1 < pool->max_not_found_size, "Out of memory",);
+
+ bx_mem_cpy(pool->not_found_buffer + not_found_size, sym_name, sym.name.size);
+ not_found_size += sym.name.size + 1;
+ } else {
u8 *begin = buf.begin + tab.data.offset + sym_index * ELF_SYMBOL_ENTRY_SIZE;
u8 *end = begin + tab.data.size;
@@ -2345,11 +2461,9 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
// TODO Search symbols
for (i64 elf_index = 0; elf_index < num_obj_files; ++elf_index) {
- Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index);
-
- Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data;
-
- i64 num_sections = elf_section_headers(buf).num;
+ Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index);
+ Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data;
+ i64 num_sections = elf_section_headers(buf).num;
for (i64 sec_index = 1; sec_index < num_sections; ++sec_index) {
Section_Header section = elf_section(buf, sec_index);
@@ -2365,21 +2479,25 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
continue;
if (bx_mem_eq(elf_name_from_offset(buf, sym.name), "printf", 6)) {
- if (sym_printf.section != 0)
- BX_LAX(0, "Symbol redefinition");
sym_printf = sym;
+ break;
}
}
+
+ if (sym_printf.section != 0)
+ break;
}
}
// ==============================================================
//
- // TODO Write sections into the output buffer.
+ // TODO Write sections into the output buffer
// ==============================================================
//
// Writing the ELF executable
+ //
+ // TODO Write into memory.
BX_LOG(VERBOSE, "Total size");
BX_LOG(VERBOSE, ".text - %lld bytes", text_size);
@@ -2387,11 +2505,20 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
BX_LOG(VERBOSE, ".data - %lld bytes", data_size);
BX_LOG(VERBOSE, ".rodata - %lld bytes", rodata_size);
- BX_LOG(VERBOSE, "");
+ if (not_found_size > 0) {
+ BX_LOG(ERROR, "Symbols not found");
+ BX_LOG(VERBOSE, "");
+ c8 *s_end = pool->not_found_buffer + not_found_size;
+ for (c8 *s = pool->not_found_buffer; s < s_end; s += bx_str_len(s, s_end) + 1)
+ BX_LOG(VERBOSE, "%s", s);
+ BX_LOG(VERBOSE, "");
+ }
BX_CHECK(sym_printf.section != 0, "Symbol not found: printf",);
BX_LOG(VERBOSE, "Found printf: %08llx", sym_printf.value.offset);
+ BX_LOG(VERBOSE, "Writing ELF x86_64 executable");
+
#define WRITE(x, n) io_write( io_out, n, x, io_user_data )
#define WRITE_V(...) io_write( io_out, sizeof((u8[]) {__VA_ARGS__}), (u8[]) {__VA_ARGS__}, io_user_data )
#define WRITE_DUP(x, n) io_write( io_out, n, (u8[n]) { 0 }, io_user_data )
@@ -2452,6 +2579,16 @@ void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data
#undef WRITE_64
#undef WRITE
+
+ // ==============================================================
+ //
+ // Cleanup
+
+ bx_mem_set(pool->obj_file_buffer, 0, pool->max_obj_file_size);
+ bx_mem_set(pool->dependencies_buffer, 0, pool->max_dependencies_size);
+ bx_mem_set(pool->obj_file_offsets, 0, pool->max_num_obj_files * sizeof *pool->obj_file_offsets);
+ bx_mem_set(pool->section_offsets, 0, pool->max_num_sections * sizeof *pool->section_offsets);
+ bx_mem_set(pool->not_found_buffer, 0, pool->max_not_found_size);
}
i64 io_open_read(i64 name_size, c8 *name, void *user_data) {
@@ -2698,13 +2835,16 @@ Pool g_pool = {
.entities = (Entity[MAX_NUM_ENTITIES]) {0},
.max_obj_file_size = MAX_OBJECT_FILE_SIZE,
- .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0},
.max_dependencies_size = MAX_DEPENDENCIES_SIZE,
- .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0},
.max_num_obj_files = MAX_NUM_OBJECT_FILES,
- .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0},
.max_num_sections = MAX_NUM_SECTIONS,
- .section_offsets = (i64[MAX_NUM_SECTIONS]) {0},
+ .max_not_found_size = MAX_NOT_FOUND_SIZE,
+
+ .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0},
+ .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0},
+ .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0},
+ .section_offsets = (i64[MAX_NUM_SECTIONS]) {0},
+ .not_found_buffer = (c8[MAX_NOT_FOUND_SIZE]) {0},
};
// Handy procedures
@@ -2787,6 +2927,8 @@ int main(int argc, char **argv) {
(void) argc;
(void) argv;
+ BX_LOG(INFO, "bxgen " BX_VERSION);
+
// Add the `main` procedure.
i64 main = p_new("main");
@@ -2822,7 +2964,6 @@ int main(int argc, char **argv) {
// l_static(u, "libtest.a");
// Write the compilation unit into an executable file.
- BX_LOG(VERBOSE, "Writing ELF x86_64 executable...");
u_elf_x86_64(u, "test_foo");
BX_CHECK(HOST == HOST_Linux, "Host system is not compatible", -1);
diff --git a/compile_flags.txt b/compile_flags.txt
index 1c01682..0bf6fd9 100644
--- a/compile_flags.txt
+++ b/compile_flags.txt
@@ -1 +1,10 @@
--Wall -Wextra -Werror -pedantic
+-Wall
+-Wextra
+-Werror
+-pedantic
+-Wno-empty-translation-unit
+-Wno-old-style-declaration
+-Wno-missing-braces
+-Wno-unused-variable
+-Wno-unused-but-set-variable
+-Wno-unused-parameter