#if 0 /* #/ ================================================================ #/ #/ bxgen.c #/ #/ Binary executable code generation and linking. #/ Compiler backend. #/ #/ ---------------------------------------------------------------- #/ #/ Qualities #/ #/ - Single source file (for now) #/ - Simple and flexible API #/ - No external dependencies #/ - No configuration required #/ - No dynamic memory management #/ - Easy cross-compilation #/ - Platform-independent host #/ #/ Inspirations #/ #/ - tinycc https://repo.or.cz/w/tinycc.git #/ - Cuik https://github.com/RealNeGate/Cuik #/ - QBE https://c9x.me/compile/ #/ #/ To-Do list #/ #/ - ELF + x86_64 executable #/ - x86_64 object file #/ - Linking libraries #/ - Unused dependencies elimination #/ - String table for names and arrays #/ - Proper prefixes for identifiers #/ - Effective entity allocation #/ - Improve error handling #/ - Implicit procedure prototypes #/ - Implicit exit after ret from entry point #/ - Static single-assignment #/ - Sea of Nodes #/ - Optimization layers #/ - Multithreading #/ - Memory reallocation when necessary #/ - C compiler and self-compilation #/ - JIT #/ - COFF, PE, OMF, Mach-O #/ - i386, RISC-V, ARM, WebAssembly #/ - Soft floating-point arithmeric #/ - Built-in standard library #/ - Built-in batteries: #/ - File I/O #/ - Input devices #/ - Networking #/ - Graphics #/ - Audio #/ #/ Bugs #/ #/ - ... #/ #/ Done features #/ #/ - ELF header #/ - IO static dispatch #/ - Correct serialization for endianness #/ - Proper error handling #/ #/ ---------------------------------------------------------------- #/ #/ (C) 2024 Mitya Selivanov , MIT License #/ #/ ================================================================ #/ #/ Self-compilation shell script #/ SRC=${0##*./} BIN=${SRC%.*} gcc \ -Wall -Wextra -Werror -pedantic \ -Wno-old-style-declaration \ -Wno-missing-braces \ -Wno-unused-variable \ -Wno-unused-but-set-variable \ -Wno-unused-parameter \ -O0 \ -fsanitize=undefined,address,leak -mshstk \ -o $BIN $SRC && \ ./$BIN $@ && \ rm $BIN exit $? # */ #endif // ================================================================ // // Compilation options // // ================================================================ #ifndef IMPLEMENTATION #define IMPLEMENTATION 1 #endif #ifndef HELPERS #define HELPERS 1 #endif #ifndef TESTING #define TESTING 1 #endif #ifndef LOG_LEVEL #define LOG_LEVEL 5 #endif #ifndef LOG_BLOCKING #define LOG_BLOCKING 0 #endif #ifndef TRACE_BLOCKING #define TRACE_BLOCKING 1 #endif // ================================================================ // // Basic declarations // // ================================================================ #define BX_VERSION "dev" typedef signed char i8; typedef signed short i16; typedef signed int i32; typedef signed long long i64; typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; typedef float f32; typedef double f64; typedef signed char b8; // 8-bit boolean typedef char c8; // 8-bit character // ================================================================ // // IR data declarations // // ================================================================ enum { // Log level ERROR = 1, WARNING, INFO, VERBOSE, TRACE, // Limits // STRING_TABLE_ALIGNMENT = 16, // TODO // TEMP MAX_NUM_OBJECT_FILES = 10 * 1024, MAX_NUM_SECTIONS = 2 * 1024 * 1024, MAX_NUM_SYMBOLS = 2 * 1024 * 1024, MAX_OBJECT_FILE_SIZE = 10 * 1024 * 1024, // 10 MB MAX_DEPENDENCIES_SIZE = 50 * 1024 * 1024, // 50 MB MAX_NOT_FOUND_SIZE = 10 * 1024, // 10 KB MAX_PATH_SIZE = 10 * 1024, MAX_LITERAL_SIZE = 400, MAX_NAME_SIZE = 80, MAX_NUM_PROCS = 40, MAX_NUM_NODES = 60, MAX_NUM_LINKS = 20, MAX_NUM_ARGS = 20, MAX_NUM_ENTITIES = 16 * 1024, // For indices UNDEFINED = -1, // Sea of Nodes flow type // FLOW_DATA = 0, FLOW_CONTROL, // Semantic node operations // DATA_I64 = 0, CTRL_CALL, CTRL_RET, // Calling conventions CONV_CDECL = 0, CONV_STDCALL, CONV_FASTCALL, CONV_THISCALL, // Primitive data types // TYPE_I32 = 0, // Unit types // UNIT_CODE = 0, UNIT_LIBRARY_OBJECT, UNIT_LIBRARY_STATIC, UNIT_LIBRARY_DYNAMIC, // Entity types // ENTITY_NODE = 0, ENTITY_PROC, ENTITY_UNIT, // IO dispatch operations // IO_OPEN_READ = 0, IO_OPEN_WRITE, IO_CLOSE, IO_SEEK, IO_TELL, IO_READ, IO_WRITE, IO_CHMOD_EXE, IO_SEEK_CURSOR = 0, IO_SEEK_BEGIN, IO_SEEK_END, // Formats // FORMAT_ELF = 1, FORMAT_COFF, FORMAT_PE, FORMAT_OMF, FORMAT_MATCH_O, // Architecture // ARCH_RISC_V = 64, ARCH_I386, ARCH_X86_64, ARCH_ARM32, ARCH_ARM64, }; // TODO typedef struct { i64 size; i64 offset; } String_Handle; // TODO typedef struct { i64 size; i64 capacity; u8 *data; u8 *occupied; } Strint_Table; typedef struct { i16 num; i16 type; i64 node; } Var; typedef struct { i16 num_vals; Var vals[MAX_NUM_ARGS]; } Ret; typedef struct { // NOTE // We may call a local procedure by it's id, // or a global procedure by name. i16 convention; // can be implicitly retrieved from the procedure i64 target_proc; i64 target_name_size; c8 target_name[MAX_NAME_SIZE]; // TODO use string table i64 num_args; Var args[MAX_NUM_ARGS]; } Call; // A semantic node is an operation with optional data // and possible references to other nodes. typedef struct { i16 op; i64 index_in_proc; union { u8 lit_bytes[MAX_LITERAL_SIZE]; // byte array literal // TODO use string table i64 lit_int; // integer literal Ret ret; Call call; }; } Node; // A procedure is a collection of semantic nodes // and has a string name. typedef struct { i16 convention; i64 name_size; c8 name[MAX_NAME_SIZE]; // TODO use string table i64 num_nodes; i64 nodes[MAX_NUM_NODES]; i64 ret_index; i64 unit; i64 index_in_unit; } Proc; // A compilation unit is a collection of procedures. // typedef struct { i16 type; i64 entry_point_index; i64 name_size; c8 name[MAX_NAME_SIZE]; // TODO use string table i64 num_procs; i64 procs[MAX_NUM_PROCS]; i64 num_links; i64 links[MAX_NUM_LINKS]; } Unit; // An entity can be any of: // - Node // - Proc // - Unit // // Every entity can be referenced by it's unique index // in the entity pool. typedef struct { b8 is_enabled; i16 type; union { Node node; Proc proc; Unit unit; }; } Entity; typedef struct { i64 name_size; c8 *name; i64 section; i64 address; i64 size; } Internal_Symbol_Entry; // Pool, a collection of all entities. // // NOTE // We use one single large memory block for *everything*. typedef struct { // Semantic graph entities i64 num_entities; i64 capacity; Entity *entities; // TEMP Linker buffers // TODO Use string table for buffers also. i64 max_obj_file_size; i64 max_dependencies_size; i64 max_num_obj_files; i64 max_num_sections; i64 max_num_symbols; i64 max_not_found_size; u8 * obj_file_buffer; u8 * dependencies_buffer; i64 * obj_file_offsets; i64 * section_addresses; Internal_Symbol_Entry *symbols; c8 * not_found_buffer; } Pool; // ================================================================ // // API declarations // // ================================================================ #ifdef __cplusplus extern "C" { #endif // ================================================================ // // Hooks // // NOTE // Shoud be implemented on the user side. // See: `* Helper procedures` // void bx_log(i32 log_level, u32 line, c8 *file, c8 *format, ...); void bx_assert(b8 condition, c8 *message, u32 line, c8 *file); void io_dispatch(i16 op, i64 *id, i64 *size, void *data, void *user_data); // ================================================================ // // Main API // i64 pool_add(Pool *pool, Entity data); void pool_remove(Pool *pool, i64 entity, i16 type); i64 node_init(Pool *pool, Node data); void node_destroy(Pool *pool, i64 node); i64 node_data_i64(Pool *pool, i64 value); i64 node_ctrl_call(Pool *pool, i16 convention, i64 target_proc, i64 num_args, Var *args); i64 node_ctrl_call_by_name(Pool *pool, i16 convention, i64 name_size, c8 *name, i64 num_args, Var *args); i64 node_ctrl_ret(Pool *pool, i64 num_values, Var *values); i64 proc_init(Pool *pool); void proc_destroy(Pool *pool, i64 proc); void proc_set_convention(Pool *pool, i64 proc, i16 convention); void proc_set_name(Pool *pool, i64 proc, i64 name_size, c8 *name); void proc_node_add(Pool *pool, i64 proc, i64 node); void proc_node_remove(Pool *pool, i64 proc, i64 node); i64 unit_init(Pool *pool, i16 type); void unit_destroy(Pool *pool, i64 unit); void unit_proc_add(Pool *pool, i64 unit, i64 proc); void unit_proc_remove(Pool *pool, i64 unit, i64 proc); void unit_link_add(Pool *pool, i64 unit, i64 link_unit); void unit_link_remove(Pool *pool, i64 unit, i64 link_unit); void unit_set_name(Pool *pool, i64 unit, i64 name_size, c8 *name); void unit_set_entry_point(Pool *pool, i64 unit, i64 entry_point_proc); void unit_write(Pool *pool, i64 unit, u16 target, i64 io_id, void *io_user_data); i64 io_open_read(i64 name_size, c8 *name, void *user_data); i64 io_open_write(i64 name_size, c8 *name, void *user_data); void io_close(i64 f, void *user_data); b8 io_seek(i64 f, i64 offset, u16 origin, void *user_data); i64 io_tell(i64 f, void *user_data); i64 io_read(i64 f, i64 size, void *data, void *user_data); i64 io_write(i64 f, i64 size, void *data, void *user_data); void io_chmod_exe(i64 f, void *user_data); // ================================================================ // // Helpers API // #ifndef DISABLE_HELPERS i64 n_i64(i64 value); i64 n_call(i16 convention, i64 target_proc, i64 num_args, Var *args); i64 n_call_by_name(i16 convention, c8 *name, i64 num_args, Var *args); i64 n_ret(i64 num_vals, Var *vals); i64 p_new(c8 *name); void p_add(i64 proc, i64 node); i64 u_new(); void u_add(i64 unit, i64 proc); void u_entry_point(i64 unit, i64 proc); void u_elf_x86_64(i64 unit, c8 *output_file_name); void l_code(i64 unit, i64 link_unit); void l_object(i64 unit, c8 *object_library); void l_static(i64 unit, c8 *static_library); #endif // ================================================================ #ifdef __cplusplus } #endif // ================================================================ // // IMPLEMENTATION // // ================================================================ // // * Basic utilities // // ================================================================ #if IMPLEMENTATION #ifdef __cplusplus #error Implementation code should be compiled with a C compiler! #endif #ifndef NULL #define NULL ((void *) 0) #endif #ifdef NDEBUG # define BX_CHECK(condition, error_string, fail_result) \ do { \ b8 ok_ = (condition); \ if (!ok_) { \ bx_log(ERROR, __LINE__, __FILE__, error_string); \ return fail_result; \ } \ } while (0) #else # define BX_CHECK(condition, error_string, fail_result) \ bx_assert((condition), error_string, __LINE__, __FILE__) #endif #ifdef NDEBUG # define BX_LAX(condition, error_string) \ do { \ if (!(condition)) \ bx_log(WARNING, __LINE__, __FILE__, error_string); \ } while (0) #else # define BX_LAX(condition, error_string) \ bx_assert((condition), error_string, __LINE__, __FILE__) #endif #ifdef NDEBUG # define BX_FAIL(error_string, fail_result) \ bx_log(ERROR, __LINE__, __FILE__, error_string); \ return fail_result #else # define BX_FAIL(error_string, fail_result) \ bx_assert(0, error_string, __LINE__, __FILE__); \ return fail_result #endif #define BX_LOG(log_level, ...) \ do { \ if (log_level <= LOG_LEVEL) \ bx_log(log_level, __LINE__, __FILE__, __VA_ARGS__); \ } while (0) i64 bx_align(i64 x, i64 a) { BX_CHECK(a > 0, "Invalid arguments", 0); return x + ((a - (x % a)) % a); } #define BX_TRACE BX_LOG(TRACE, "") void bx_mem_set(void *dst, u8 val, i64 size) { BX_CHECK(dst != NULL, "Invalid arguments",); BX_CHECK(size > 0, "Invalid size",); for (i64 i = 0; i < size; ++i) ((u8 *)dst)[i] = val; } void bx_mem_cpy(void *dst, void *src, i64 size) { BX_CHECK(dst != NULL, "Invalid arguments",); BX_CHECK(src != NULL, "Invalid arguments",); BX_CHECK(size > 0, "Invalid size",); for (i64 i = 0; i < size; ++i) ((u8 *)dst)[i] = ((u8 *)src)[i]; } b8 bx_mem_eq(void *a, void *b, i64 size) { BX_CHECK(a != NULL, "Invalid arguments", 0); BX_CHECK(b != NULL, "Invalid arguments", 0); BX_CHECK(size > 0, "Invalid size", 0); u8 *x = (u8 *) a; u8 *y = (u8 *) b; for (i64 i = 0; i < size; ++i) if (x[i] != y[i]) return 0; return 1; } i64 bx_str_len(c8 *s, c8 *s_end) { BX_CHECK(s < s_end, "Buffer overflow", 0); for (i64 len = 0; s + len < s_end; ++len) if (s[len] == '\0') return len; BX_FAIL("Buffer overflow", 0); } i64 bx_str_len_or(c8 *s, c8 *s_max, i64 or_val) { for (i64 len = 0; s + len < s_max; ++len) if (s[len] == '\0') return len; return or_val; } c8 *bx_find_char(c8 *s, c8 *s_end, c8 c) { BX_CHECK(s != NULL, "Invalid arguments", NULL); BX_CHECK(s_end != NULL, "Invalid arguments", NULL); while (s < s_end && *s != c) ++s; return *s == c ? s : NULL; } c8 *bx_find_str(c8 *s, c8 *s_end, c8 *sub, c8 *sub_end) { BX_CHECK(s != NULL, "Invalid arguments", NULL); BX_CHECK(s_end != NULL, "Invalid arguments", NULL); BX_CHECK(sub != NULL, "Invalid arguments", NULL); BX_CHECK(sub_end != NULL, "Invalid arguments", NULL); while (sub_end - sub <= s_end - s && s < s_end) { c8 *q = s; c8 *p = sub; for (; q < s_end && p < sub_end; ++q, ++p) if (*q != *p) break; if (p == sub_end) return s; ++s; } return NULL; } c8 *bx_find_str_in_table(c8 *buf, c8 *buf_end, c8 *sub, c8 *sub_end) { BX_CHECK(buf != NULL, "Invalid arguments", NULL); BX_CHECK(buf_end != NULL, "Invalid arguments", NULL); BX_CHECK(sub != NULL, "Invalid arguments", NULL); BX_CHECK(sub_end != NULL, "Invalid arguments", NULL); while (buf < buf_end) { i64 len = bx_str_len(buf, buf_end); if (sub_end - sub == len && bx_mem_eq(buf, sub, len)) return buf; buf += len + 1; } return NULL; } u64 bx_u64_from_dec_str(c8 *s, c8 *s_end) { BX_CHECK(s != NULL && s_end != NULL, "Invalid arguments", 0); BX_CHECK(s < s_end, "Buffer overflow", 0); BX_CHECK(*s >= '0' && *s <= '9', "Parsing failed", 0); u64 x = 0; for (; s < s_end && *s >= '0' && *s <= '9'; ++s) x = (x * 10) + (*s - '0'); BX_LAX(s == s_end || *s == ' ' || *s == '\0', "Parsing failed"); return x; } // ================================================================ // // * Semantic graph // // ================================================================ // IR building procs // i64 pool_add(Pool *pool, Entity data) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments", UNDEFINED); BX_CHECK(pool->num_entities < pool->capacity, "Out of memory", UNDEFINED); i64 id = pool->num_entities++; data.is_enabled = 1, pool->entities[id] = data; return id; } void pool_remove(Pool *pool, i64 entity, i16 type) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(entity >= 0 && entity < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[entity].is_enabled, "Entity already removed",); BX_CHECK(pool->entities[entity].type == type, "Invalid entity type",); pool->entities[entity].is_enabled = 1; } i64 node_init(Pool *pool, Node data) { data.index_in_proc = UNDEFINED; return pool_add(pool, (Entity) { .type = ENTITY_NODE, .node = data, }); } void node_destroy(Pool *pool, i64 node) { pool_remove(pool, node, ENTITY_NODE); } i64 node_data_i64(Pool *pool, i64 value) { return node_init(pool, (Node) { .op = DATA_I64, .lit_int = value, }); } i64 node_ctrl_call(Pool *pool, i16 convention, i64 target_proc, i64 num_args, Var *args) { BX_CHECK(num_args <= MAX_NUM_ARGS, "Array too big", UNDEFINED); Call call = { .convention = convention, .target_proc = target_proc, .num_args = num_args, }; if (num_args > 0) bx_mem_cpy(call.args, args, num_args * sizeof *args); return node_init(pool, (Node) { .op = CTRL_CALL, .call = call, }); } i64 node_ctrl_call_by_name(Pool *pool, i16 convention, i64 name_size, c8 *name, i64 num_args, Var *args) { BX_CHECK(num_args <= MAX_NUM_ARGS, "Array too big", UNDEFINED); Call call = { .convention = convention, .target_name_size = name_size, .num_args = num_args, }; if (name_size > 0) bx_mem_cpy(call.target_name, name, name_size); if (num_args > 0) bx_mem_cpy(call.args, args, num_args * sizeof *args); return node_init(pool, (Node) { .op = CTRL_CALL, .call = call, }); } i64 node_ctrl_ret(Pool *pool, i64 num_values, Var *values) { BX_CHECK(num_values <= MAX_NUM_ARGS, "Array too big", UNDEFINED); Ret ret = { .num_vals = num_values, }; if (num_values > 0) bx_mem_cpy(ret.vals, values, num_values * sizeof *values); return node_init(pool, (Node) { .op = CTRL_RET, .ret = ret, }); } i64 proc_init(Pool *pool) { return pool_add(pool, (Entity) { .type = ENTITY_PROC, .proc = (Proc) { .ret_index = UNDEFINED, .index_in_unit = UNDEFINED, }, }); } void proc_destroy(Pool *pool, i64 proc) { pool_remove(pool, proc, ENTITY_PROC); } void proc_set_convention(Pool *pool, i64 proc, i16 convention) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(proc >= 0 && proc < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[proc].is_enabled, "Entity does not exist",); BX_CHECK(pool->entities[proc].type == ENTITY_PROC, "Invalid entity type",); pool->entities[proc].proc.convention = convention; } void proc_set_name(Pool *pool, i64 proc, i64 name_size, c8 *name) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(proc >= 0 && proc < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[proc].is_enabled, "Entity does not exist",); BX_CHECK(pool->entities[proc].type == ENTITY_PROC, "Invalid entity type",); BX_CHECK(name_size <= MAX_NAME_SIZE, "Name too big",); BX_CHECK(name_size >= 0, "Invalid arguments",); Proc *p = &pool->entities[proc].proc; p->name_size = name_size; if (name_size > 0) bx_mem_cpy(p->name, name, name_size); } void proc_node_add(Pool *pool, i64 proc, i64 node) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(proc >= 0 && proc < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[proc].is_enabled, "Proc does not exist",); BX_CHECK(pool->entities[proc].type == ENTITY_PROC, "Invalid entity type",); BX_CHECK(pool->entities[node].is_enabled, "Node does not exist",); BX_CHECK(pool->entities[node].type == ENTITY_NODE, "Invalid entity type",); Proc *p = &pool->entities[proc].proc; Node *n = &pool->entities[node].node; BX_CHECK(n->index_in_proc == UNDEFINED, "Internal",); i64 index = p->num_nodes; if (n->op == CTRL_RET) { // Only one return node is allowed. // BX_CHECK(p->ret_index == UNDEFINED, "Internal",); p->ret_index = index; } BX_CHECK(index < MAX_NUM_NODES, "Out of memory",); n->index_in_proc = index; p->nodes[index] = node; ++p->num_nodes; } void proc_node_remove(Pool *pool, i64 proc, i64 node) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(proc >= 0 && proc < pool->num_entities, "Buffer overflow",); BX_CHECK(node >= 0 && node < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[proc].is_enabled, "Entity does not exist",); BX_CHECK(pool->entities[proc].type == ENTITY_PROC, "Invalid entity type",); BX_CHECK(pool->entities[node].type == ENTITY_NODE, "Invalid entity type",); Proc *p = &pool->entities[proc].proc; Node *n = &pool->entities[node].node; BX_CHECK(n->index_in_proc != UNDEFINED, "Internal",); BX_CHECK(p->nodes[n->index_in_proc] == node, "Internal",); if (n->op == CTRL_RET) { BX_CHECK(p->ret_index != UNDEFINED, "Internal",); p->ret_index = UNDEFINED; } p->nodes[n->index_in_proc] = UNDEFINED; n->index_in_proc = UNDEFINED; } i64 unit_init(Pool *pool, i16 type) { return pool_add(pool, (Entity) { .type = ENTITY_UNIT, .unit = (Unit) { .type = type, .entry_point_index = UNDEFINED, } }); } void unit_destroy(Pool *pool, i64 unit) { pool_remove(pool, unit, ENTITY_UNIT); } void unit_proc_add(Pool *pool, i64 unit, i64 proc) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(unit >= 0 && unit < pool->num_entities, "Buffer overflow",); BX_CHECK(proc >= 0 && proc < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[unit].is_enabled, "Unit does not exist",); BX_CHECK(pool->entities[unit].type == ENTITY_UNIT, "Invalid entity type",); BX_CHECK(pool->entities[proc].is_enabled, "Proc does not exist",); BX_CHECK(pool->entities[proc].type == ENTITY_PROC, "Invalid proc type",); Unit *u = &pool->entities[unit].unit; Proc *p = &pool->entities[proc].proc; BX_CHECK(p->index_in_unit == UNDEFINED, "Internal",); i64 index = u->num_procs; BX_CHECK(index < MAX_NUM_PROCS, "Out of memory",); p->index_in_unit = index; u->procs[index] = proc; ++u->num_procs; } void unit_proc_remove(Pool *pool, i64 unit, i64 proc) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(unit >= 0 && unit < pool->num_entities, "Buffer overflow",); BX_CHECK(proc >= 0 && proc < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[unit].is_enabled, "Unit does not exist",); BX_CHECK(pool->entities[unit].type == ENTITY_UNIT, "Invalid entity type",); BX_CHECK(pool->entities[proc].type == ENTITY_PROC, "Invalid entity type",); Unit *u = &pool->entities[unit].unit; Proc *p = &pool->entities[proc].proc; BX_CHECK(p->index_in_unit != UNDEFINED, "Internal",); BX_CHECK(u->procs[p->index_in_unit] == proc, "Internal",); if (u->entry_point_index == p->index_in_unit) u->entry_point_index = UNDEFINED; u->procs[p->index_in_unit] = UNDEFINED; p->index_in_unit = UNDEFINED; } void unit_link_add(Pool *pool, i64 unit, i64 link_unit) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(unit >= 0 && unit < pool->num_entities, "Buffer overflow",); BX_CHECK(link_unit >= 0 && link_unit < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[unit].is_enabled, "Unit does not exist",); BX_CHECK(pool->entities[unit].type == ENTITY_UNIT, "Invalid entity type",); BX_CHECK(pool->entities[link_unit].is_enabled, "Link does not exist",); BX_CHECK(pool->entities[link_unit].type == ENTITY_UNIT, "Invalid entity type",); Unit *u = &pool->entities[unit].unit; for (i64 i = 0; i < u->num_links; ++i) if (u->links[i] == link_unit) return; BX_CHECK(u->num_links < MAX_NUM_LINKS, "Internal",); u->links[u->num_links++] = link_unit; } void unit_link_remove(Pool *pool, i64 unit, i64 link_unit) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(unit >= 0 && unit < pool->num_entities, "Buffer overflow",); BX_CHECK(link_unit >= 0 && link_unit < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[unit].is_enabled, "Unit does not exist",); BX_CHECK(pool->entities[unit].type == ENTITY_UNIT, "Invalid entity type",); BX_CHECK(pool->entities[link_unit].type == ENTITY_UNIT, "Invalid entity type",); Unit *u = &pool->entities[unit].unit; for (i64 i = 0; i < u->num_links; ++i) if (u->links[i] == link_unit) { u->links[i] = UNDEFINED; return; } BX_FAIL("Link not found",); } void unit_set_name(Pool *pool, i64 unit, i64 name_size, c8 *name) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(unit >= 0 && unit < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[unit].is_enabled, "Unit does not exist",); BX_CHECK(pool->entities[unit].type == ENTITY_UNIT, "Invalid entity type",); BX_CHECK(name_size <= MAX_NAME_SIZE, "Name too big",); BX_CHECK(name_size >= 0, "Invalid arguments",); Unit *u = &pool->entities[unit].unit; u->name_size = name_size; if (name_size > 0) bx_mem_cpy(u->name, name, name_size); } void unit_set_entry_point(Pool *pool, i64 unit, i64 entry_point_proc) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(unit >= 0 && unit < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[unit].is_enabled, "Unit does not exist",); BX_CHECK(pool->entities[unit].type == ENTITY_UNIT, "Invalid unit type",); Unit *u = &pool->entities[unit].unit; if (entry_point_proc == UNDEFINED) { u->entry_point_index = UNDEFINED; return; } BX_CHECK(entry_point_proc >= 0 && entry_point_proc < pool->num_entities, "Buffer overflow",); BX_CHECK(pool->entities[entry_point_proc].is_enabled, "Internal",); BX_CHECK(pool->entities[entry_point_proc].type == ENTITY_PROC, "Internal",); Proc *p = &pool->entities[entry_point_proc].proc; BX_CHECK(p->index_in_unit != UNDEFINED, "Internal",); BX_CHECK(u->procs[p->index_in_unit] == entry_point_proc, "Internal",); pool->entities[unit].unit.entry_point_index = p->index_in_unit; } // ================================================================ // // * Serialization // // ---------------------------------------------------------------- // // Terms // // LE = little endian // BE = big endian // HO = host ordering // // byte = 8 bits // word = 2 bytes // dword = 4 bytes // qword = 8 bytes // // ================================================================ enum { BIT_LE = 0, BIT_BE = 1, BIT_ORDER_MASK = 1, BYTE_LE = 0, BYTE_BE = 2, BYTE_ORDER_MASK = 2, WORD_LE = 0, WORD_BE = 4, WORD_ORDER_MASK = 4, DWORD_LE = 0, DWORD_BE = 8, DWORD_ORDER_MASK = 8, F64_DWORD_LE = 0, F64_DWORD_BE = 16, F64_DWORD_ORDER_MASK = 16, LE = BIT_LE | BYTE_LE | WORD_LE | DWORD_LE | F64_DWORD_LE, BE = BIT_BE | BYTE_BE | WORD_BE | DWORD_BE | F64_DWORD_BE, }; typedef struct { unsigned first:1; } Bits; u32 host_bit_order() { if ((*(Bits *) &(u8) { 1 }).first == 1) return BIT_LE; return BIT_BE; } u32 host_byte_order() { if (((u8 *) &(u32) { 1 })[0] == 1) return BYTE_LE; return BYTE_BE; } u32 host_word_order() { if (((u16 *) &(u32) { 0x100 })[0] == 0x100) return WORD_LE; return WORD_BE; } u32 host_dword_order() { if (((u32 *) &(u64) { 0x10000 })[0] == 0x10000) return DWORD_LE; return DWORD_BE; } void check_f32_format() { // FIXME if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffffull) == 0x40301fcbull) return; if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffff00000000ull) == 0x40301fcb00000000ull) return; BX_FAIL("Unknown host floating-point number format",); } u32 host_f64_dword_order() { if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffffull) == 0x40301fcbull) return host_dword_order() == DWORD_LE ? F64_DWORD_LE : F64_DWORD_BE; if ((*(u64 *) &(f64) { -1.4575323640233e-306 } & 0xffffffff00000000ull) == 0x40301fcb00000000ull) return host_dword_order() == DWORD_LE ? F64_DWORD_BE : F64_DWORD_LE; BX_FAIL("Unknown host floating-point number format", 0); } u32 host_data_ordering() { return host_bit_order() | host_byte_order() | host_word_order() | host_dword_order() | host_f64_dword_order(); } u8 read_u8(u32 ordering, u8 *v, u8 *v_end) { BX_CHECK(v != NULL, "Invalid arguments", 0); BX_CHECK(v < v_end, "Buffer overflow", 0); if ((ordering & BIT_ORDER_MASK) == host_bit_order()) return *v; return ((*v >> 7) & 1) | (((*v >> 6) & 1) << 1) | (((*v >> 5) & 1) << 2) | (((*v >> 4) & 1) << 3) | (((*v >> 3) & 1) << 4) | (((*v >> 2) & 1) << 5) | (((*v >> 1) & 1) << 6) | (((*v) & 1) << 7); } u16 read_u16(u32 ordering, u8 *v, u8 *v_end) { BX_CHECK(v != NULL, "Invalid arguments", 0); BX_CHECK(v + 2 <= v_end, "Buffer overflow", 0); u16 x; if ((ordering & BIT_ORDER_MASK) == host_bit_order() && (ordering & BYTE_ORDER_MASK) == host_byte_order()) bx_mem_cpy(&x, v, 2); else if ((ordering & BYTE_ORDER_MASK) == host_byte_order()) x = ((u16) read_u8(ordering, v, v_end)) | (((u16) read_u8(ordering, v + 1, v_end)) << 8); else x = ((u16) read_u8(ordering, v + 1, v_end)) | (((u16) read_u8(ordering, v, v_end)) << 8); return x; } u32 read_u32(u32 ordering, u8 *v, u8 *v_end) { BX_CHECK(v != NULL, "Invalid arguments", 0); BX_CHECK(v + 4 <= v_end, "Buffer overflow", 0); u32 x; if ((ordering & BIT_ORDER_MASK) == host_bit_order() && (ordering & BYTE_ORDER_MASK) == host_byte_order() && (ordering & WORD_ORDER_MASK) == host_word_order()) bx_mem_cpy(&x, v, 4); else if ((ordering & WORD_ORDER_MASK) == host_word_order()) x = ((u32) read_u16(ordering, v, v_end)) | (((u32) read_u16(ordering, v + 2, v_end)) << 16); else x = ((u32) read_u16(ordering, v + 2, v_end)) | (((u32) read_u16(ordering, v, v_end)) << 16); return x; } u64 read_u64(u32 ordering, u8 *v, u8 *v_end) { BX_CHECK(v != NULL, "Invalid arguments", 0); BX_CHECK(v + 8 <= v_end, "Buffer overflow", 0); u64 x; if ((ordering & BIT_ORDER_MASK) == host_bit_order() && (ordering & BYTE_ORDER_MASK) == host_byte_order() && (ordering & WORD_ORDER_MASK) == host_word_order() && (ordering & DWORD_ORDER_MASK) == host_dword_order()) bx_mem_cpy(&x, v, 8); else if ((ordering & DWORD_ORDER_MASK) == host_dword_order()) x = ((u64) read_u32(ordering, v, v_end)) | (((u64) read_u32(ordering, v + 4, v_end)) << 32); else x = ((u64) read_u32(ordering, v + 4, v_end)) | (((u64) read_u32(ordering, v, v_end)) << 32); return x; } void write_u8(u8 ordering, u8 x, u8 *v, u8 *v_end) { BX_CHECK(v != NULL, "Invalid arguments",); BX_CHECK(v < v_end, "Buffer overflow",); if ((ordering & BIT_ORDER_MASK) == host_bit_order()) *v = x; else *v = ((x >> 7) & 1) | (((x >> 6) & 1) << 1) | (((x >> 5) & 1) << 2) | (((x >> 4) & 1) << 3) | (((x >> 3) & 1) << 4) | (((x >> 2) & 1) << 5) | (((x >> 1) & 1) << 6) | (((x) & 1) << 7); } void write_u16(u32 ordering, u16 x, u8 *v, u8 *v_end) { BX_CHECK(v != NULL, "Invalid arguments",); BX_CHECK(v + 2 <= v_end, "Buffer overflow",); if ((ordering & BIT_ORDER_MASK) == host_bit_order() && (ordering & BYTE_ORDER_MASK) == host_byte_order()) bx_mem_cpy(v, &x, 2); else if ((ordering & BYTE_ORDER_MASK) == host_byte_order()) { write_u8(ordering, (u8) ( x & 0xff), v, v_end); write_u8(ordering, (u8) ((x >> 8) & 0xff), v + 1, v_end); } else { write_u8(ordering, (u8) ( x & 0xff), v + 1, v_end); write_u8(ordering, (u8) ((x >> 8) & 0xff), v, v_end); } } void write_u32(u32 ordering, u32 x, u8 *v, u8 *v_end) { BX_CHECK(v != NULL, "Invalid arguments",); BX_CHECK(v + 4 <= v_end, "Buffer overflow",); if ((ordering & BIT_ORDER_MASK) == host_bit_order() && (ordering & BYTE_ORDER_MASK) == host_byte_order() && (ordering & WORD_ORDER_MASK) == host_word_order()) bx_mem_cpy(v, &x, 4); else if ((ordering & WORD_ORDER_MASK) == host_word_order()) { write_u16(ordering, (u16) ( x & 0xffff), v, v_end); write_u16(ordering, (u16) ((x >> 16) & 0xffff), v + 2, v_end); } else { write_u16(ordering, (u16) ( x & 0xffff), v + 2, v_end); write_u16(ordering, (u16) ((x >> 16) & 0xffff), v, v_end); } } void write_u64(u32 ordering, u64 x, u8 *v, u8 *v_end) { BX_CHECK(v != NULL, "Invalid arguments",); BX_CHECK(v + 8 <= v_end, "Buffer overflow",); if ((ordering & BIT_ORDER_MASK) == host_bit_order() && (ordering & BYTE_ORDER_MASK) == host_byte_order() && (ordering & WORD_ORDER_MASK) == host_word_order() && (ordering & DWORD_ORDER_MASK) == host_dword_order()) bx_mem_cpy(v, &x, 8); else if ((ordering & DWORD_ORDER_MASK) == host_dword_order()) { write_u32(ordering, (u32) ( x & 0xffffffffull), v, v_end); write_u32(ordering, (u32) ((x >> 32) & 0xffffffffull), v + 4, v_end); } else { write_u32(ordering, (u32) ( x & 0xffffffffull), v + 4, v_end); write_u32(ordering, (u32) ((x >> 16) & 0xffffffffull), v, v_end); } } i16 read_i8(u32 ordering, void *v, void *v_end) { return (i8) read_u8(ordering, v, v_end); } i16 read_i16(u32 ordering, void *v, void *v_end) { return (i16) read_u16(ordering, v, v_end); } i32 read_i32(u32 ordering, void *v, void *v_end) { return (i32) read_u32(ordering, v, v_end); } i64 read_i64(u32 ordering, void *v, void *v_end) { return (i64) read_u64(ordering, v, v_end); } f32 read_f32(u32 ordering, void *v, void *v_end) { check_f32_format(); return *(f32 *) &(u32) { read_u32(ordering, v, v_end) }; } f64 read_f64(u32 ordering, void *v, void *v_end) { u64 x = read_u64(ordering, v, v_end); if ((ordering & F64_DWORD_ORDER_MASK) != host_f64_dword_order()) x = ((x & 0xffffffffull) << 32) | ((x >> 32) & 0xffffffffull); void *p = &x; return *(f64 *) p; } void write_i8(u32 ordering, i8 x, void *v, void *v_end) { write_u8(ordering, (u8) x, v, v_end); } void write_i16(u32 ordering, i16 x, void *v, void *v_end) { write_u16(ordering, (u16) x, v, v_end); } void write_i32(u32 ordering, i32 x, void *v, void *v_end) { write_u32(ordering, (u32) x, v, v_end); } void write_i64(u32 ordering, i64 x, void *v, void *v_end) { write_u64(ordering, (u64) x, v, v_end); } void write_f32(u32 ordering, f32 x, void *v, void *v_end) { check_f32_format(); void *p = &x; write_u32(ordering, *(u32 *) p, v, v_end); } void write_f64(u32 ordering, f64 x, void *v, void *v_end) { void *p = &x; if ((ordering & F64_DWORD_ORDER_MASK) == host_f64_dword_order()) write_u64(ordering, *(u64 *) p, v, v_end); else { write_u32(ordering, *(((u32 *) p) + 1), (u8 *) v, v_end); write_u32(ordering, * (u32 *) p, ((u8 *) v) + 4, v_end); } } // Shortcuts #define HO host_data_ordering() // ================================================================ // // * Code generation and linking // // ---------------------------------------------------------------- // // Docs and helpful materials // // AR https://man.freebsd.org/cgi/man.cgi?query=ar&sektion=5 // ELF https://man7.org/linux/man-pages/man5/elf.5.html // // Relocation types // https://intezer.com/blog/malware-analysis/executable-and-linkable-format-101-part-3-relocations/ // https://docs.oracle.com/cd/E19120-01/open.solaris/819-0690/chapter7-2/index.html // // https://web.archive.org/web/20150324024617/http://mylinuxbook.com/readelf-command/ // // tinycc impl // https://repo.or.cz/tinycc.git/blob/HEAD:/x86_64-link.c // https://repo.or.cz/tinycc.git/blob/HEAD:/tccelf.c // // Online assembler // https://defuse.ca/online-x86-assembler.htm // https://shell-storm.org/online/Online-Assembler-and-Disassembler/ // // Linux syscall // https://man7.org/linux/man-pages/man2/intro.2.html // https://man7.org/linux/man-pages/man2/syscalls.2.html // https://man7.org/linux/man-pages/man2/syscall.2.html // // ---------------------------------------------------------------- // // TODO Experiment with mapping several p_vaddr into one p_paddr. // // ================================================================ enum { HOST_Unknown = 0, HOST_Unix, HOST_Linux, HOST_Windows, HOST_macOS, HOST_Cygwin, #if defined(__CYGWIN__) HOST = HOST_Cygwin, #elif defined(_WIN32) HOST = HOST_Windows, #elif defined(__linux__) HOST = HOST_Linux, #elif defined(__APPLE__) HOST = HOST_macOS, #elif defined(__unix__) HOST = HOST_Unix, #else HOST = HOST_Unknown, #endif // x86_64 constants // X86_64_BASE_ADDRESS = 0x400000, X86_64_ALIGNMENT = 8, // ELF format constants // ELF_64 = 2, ELF_2_LE = 1, ELF_VERSION = 1, ELF_SYS_V = 0, ELF_LINUX = 3, ELF_ABI_VERSION = 0, ELF_RELOCATABLE = 1, ELF_EXECUTABLE = 2, ELF_X86_64 = 62, ELF_HEADER_SIZE = 64, ELF_PROGRAM_HEADER_SIZE = 56, ELF_SECTION_HEADER_SIZE = 64, ELF_SYMBOL_ENTRY_SIZE = 24, ELF_REL_ENTRY_SIZE = 16, ELF_RELA_ENTRY_SIZE = 24, SEC_NONE = 0, SEC_PROGBITS, SEC_SYMTAB, SEC_STRTAB, SEC_RELA, SEC_HASH, SEC_DYNAMIC, SEC_NOTE, SEC_NOBITS, SEC_REL, SEC_SHLIB, SEC_DYNSYM, SEC_INIT_ARRAY = 14, SEC_FINI_ARRAY, SEC_PREINIT_ARRAY, SEC_GROUP, SEC_SYMTAB_SHNDX, SYM_NONE = 0, SYM_PROC, SYM_DATA, SYM_COMMON, SYM_TLS, SYM_SECTION, SYM_SPECIFIC, BIND_LOCAL = 0, BIND_GLOBAL, BIND_WEAK, // Relocation types // R_X86_64_NONE = 0, R_X86_64_64, R_X86_64_PC32, R_X86_64_GOT32, R_X86_64_PLT32, R_X86_64_COPY, R_X86_64_GLOB_DAT, R_X86_64_JUMP_SLOT, R_X86_64_RELATIVE, R_X86_64_GOTPCREL, R_X86_64_32, R_X86_64_32S, R_X86_64_16, R_X86_64_PC16, R_X86_64_8, R_X86_64_PC8, R_X86_64_DTPMOD64, R_X86_64_DTPOFF64, R_X86_64_TPOFF64, R_X86_64_TLSGD, R_X86_64_TLSLD, R_X86_64_DTPOFF32, R_X86_64_GOTTPOFF, R_X86_64_TPOFF32, R_X86_64_PC64, R_X86_64_GOTOFF64, R_X86_64_GOTPC32, R_X86_64_GOT64, R_X86_64_GOTPCREL64, R_X86_64_GOTPC64, R_X86_64_GOTPLT64, R_X86_64_PLTOFF64, R_X86_64_SIZE32, R_X86_64_SIZE64, R_X86_64_GOTPC32_TLSDESC, R_X86_64_TLSDESC_CALL, R_X86_64_TLSDESC, R_X86_64_IRELATIVE, R_X86_64_RELATIVE64, R_X86_64_GOTPCRELX = 41, R_X86_64_REX_GOTPCRELX, }; c8 *SEC_TYPE_NAMES[] = { [SEC_NONE] = "none", [SEC_PROGBITS] = "progbits", [SEC_SYMTAB] = "symtab", [SEC_STRTAB] = "strtab", [SEC_RELA] = "rela", [SEC_HASH] = "hash", [SEC_DYNAMIC] = "dynamic", [SEC_NOTE] = "note", [SEC_NOBITS] = "nobits", [SEC_REL] = "rel", [SEC_SHLIB] = "shlib", [SEC_DYNSYM] = "dynsym", [12] = "", [13] = "", [SEC_INIT_ARRAY] = "init array", [SEC_FINI_ARRAY] = "fini array", [SEC_PREINIT_ARRAY] = "preinit array", [SEC_GROUP] = "group", [SEC_SYMTAB_SHNDX] = "symtab shndx", }; c8 *SYM_TYPE_NAMES[] = { [SYM_NONE] = "none", [SYM_PROC] = "proc", [SYM_DATA] = "data", [SYM_COMMON] = "common", [SYM_TLS] = "tls", [SYM_SECTION] = "section", [SYM_SPECIFIC] = "spec", }; c8 *BIND_TYPE_NAMES[] = { [BIND_LOCAL] = "local", [BIND_GLOBAL] = "global", [BIND_WEAK] = "weak", }; c8 *REL_NAMES[] = { [R_X86_64_NONE] = "none", [R_X86_64_64] = "64", [R_X86_64_PC32] = "pc32", [R_X86_64_GOT32] = "got32", [R_X86_64_PLT32] = "plt32", [R_X86_64_COPY] = "copy", [R_X86_64_GLOB_DAT] = "glob dat", [R_X86_64_JUMP_SLOT] = "jump slot", [R_X86_64_RELATIVE] = "relative", [R_X86_64_GOTPCREL] = "gotpcrel", [R_X86_64_32] = "32", [R_X86_64_32S] = "32s", [R_X86_64_16] = "16", [R_X86_64_PC16] = "pc16", [R_X86_64_8] = "8", [R_X86_64_PC8] = "pc8", [R_X86_64_DTPMOD64] = "dtpmod64", [R_X86_64_DTPOFF64] = "dtpoff64", [R_X86_64_TPOFF64] = "tpoff64", [R_X86_64_TLSGD] = "tlsgd", [R_X86_64_TLSLD] = "tlsld", [R_X86_64_DTPOFF32] = "dtpoff32", [R_X86_64_GOTTPOFF] = "gottpoff", [R_X86_64_TPOFF32] = "tpoff32", [R_X86_64_PC64] = "pc64", [R_X86_64_GOTOFF64] = "gotoff64", [R_X86_64_GOTPC32] = "gotpc32", [R_X86_64_GOT64] = "got64", [R_X86_64_GOTPCREL64] = "gotpcrel64", [R_X86_64_GOTPC64] = "gotpc64", [R_X86_64_GOTPLT64] = "gotplt64", [R_X86_64_PLTOFF64] = "pltoff64", [R_X86_64_SIZE32] = "size32", [R_X86_64_SIZE64] = "size64", [R_X86_64_GOTPC32_TLSDESC] = "gotpc32 tlsdesc", [R_X86_64_TLSDESC_CALL] = "tlsdesc call", [R_X86_64_TLSDESC] = "tlsdesc", [R_X86_64_IRELATIVE] = "irelative", [R_X86_64_RELATIVE64] = "relative64", [R_X86_64_GOTPCRELX] = "gotpcrelx", [R_X86_64_REX_GOTPCRELX] = "gotpcrelx", }; c8 ELF_MAGIC[4] = "\x7f" "ELF"; c8 AR_MAGIC[8] = "!\n"; c8 AR_SYMBOL_TABLE[] = "/ "; c8 AR_STRING_TABLE[] = "// "; c8 SECTION_TEXT[] = ".text"; c8 SECTION_RELA_TEXT[] = ".rela.text"; c8 SECTION_DATA[] = ".data"; c8 SECTION_BSS[] = ".bss"; c8 SECTION_RODATA[] = ".rodata"; c8 SECTION_SYMTAB[] = ".symtab"; c8 SECTION_STRTAB[] = ".strtab"; c8 SECTION_SHSTRTAB[] = ".shstrtab"; typedef struct { i64 offset; i64 size; } Offset_Size; typedef struct { u8 * begin; u8 * end; Offset_Size elf; } Buffer_Context; typedef struct { i64 offset; i64 num; } Offset_Num; typedef struct { Offset_Size name; u32 type; b8 alloc; b8 write; b8 exec; i64 alignment; i64 entry_size; i64 num_entries; Offset_Size data; } Section_Header; typedef struct { Offset_Size name; u8 type; u8 bind; i64 section; Offset_Size value; } Symbol_Entry; typedef struct { Symbol_Entry symbol; i64 offset; u32 type; i64 addent; } Relx_Entry; // ================================================================ i64 ar_find_symbol_offset_by_name( u8 *ar_symbol_table, u8 *ar_end, c8 *name, c8 *name_end ) { BX_CHECK(ar_symbol_table != NULL, "Invalid arguments", -1); BX_CHECK(name != NULL, "Invalid arguments", -1); BX_CHECK(name_end > name, "Invalid arguments", -1); i64 num = (i64) read_u32((LE & ~BYTE_ORDER_MASK) | BYTE_BE, ar_symbol_table, ar_end); i64 len = name_end - name; c8 *s = (c8 *) (ar_symbol_table + 4 * (num + 1)); i64 index = 0; for (; index < num; ++index) { BX_CHECK(s + len <= (c8 *) ar_end, "Buffer overflow", -1); if (s[len] == '\0' && bx_mem_eq(s, name, len)) return (i64) read_u32((LE & ~BYTE_ORDER_MASK) | BYTE_BE, ar_symbol_table + 4 * (index + 1), ar_end); while (*s != '\0' && s < (c8 *) ar_end) ++s; BX_CHECK(s < (c8 *) ar_end, "Buffer overflow", -1); BX_CHECK(*s == '\0', "Buffer overflow", -1); ++s; } BX_FAIL("Symbol not found", 0); } Buffer_Context elf_buffer_context( Pool *pool, i64 num_obj_files, i64 elf_index ) { return (Buffer_Context) { .begin = pool->dependencies_buffer, .end = pool->dependencies_buffer + pool->obj_file_offsets[num_obj_files], .elf = { .offset = pool->obj_file_offsets[elf_index], .size = pool->obj_file_offsets[elf_index + 1] - pool->obj_file_offsets[elf_index], }, }; } Offset_Num elf_section_headers( Buffer_Context b ) { u8 *begin = b.begin + b.elf.offset; u8 *end = begin + b.elf.size; BX_CHECK(end <= b.end, "Buffer overflow", (Offset_Num) {0}); return (Offset_Num) { .offset = b.elf.offset + read_i64(LE, begin + 40, end), .num = (i64) read_u16(LE, begin + 60, end), }; } i64 elf_section_header_offset( Buffer_Context b, i64 index ) { return elf_section_headers(b).offset + ELF_SECTION_HEADER_SIZE * index; } Offset_Size elf_section_names_data( Buffer_Context b ) { u8 *elf_begin = b.begin + b.elf.offset; u8 *elf_end = elf_begin + b.elf.size; BX_CHECK(elf_end <= b.end, "Buffer overflow", (Offset_Size) {0}); i64 string_table_index = (i64) read_u16(LE, elf_begin + 62, elf_end); u8 *begin = b.begin + elf_section_header_offset(b, string_table_index); return (Offset_Size) { .offset = b.elf.offset + read_i64(LE, begin + 24, elf_end), .size = read_i64(LE, begin + 32, elf_end), }; } Offset_Size elf_name_in_string_table( Buffer_Context b, Offset_Size string_table, i64 name_offset ) { if (name_offset == 0) return (Offset_Size) { .offset = 0, .size = 0, }; c8 *begin = (c8 *) b.begin + string_table.offset + name_offset; c8 *end = (c8 *) b.begin + string_table.offset + string_table.size; return (Offset_Size) { .offset = string_table.offset + name_offset, .size = bx_str_len(begin, end), }; } i64 elf_find_section_index_by_name( Buffer_Context b, c8 * name, i64 name_size ) { BX_CHECK(name != NULL, "Invalid arguments", 0); if (name_size == 0) return 0; Offset_Num headers = elf_section_headers(b); Offset_Size names = elf_section_names_data(b); for (i64 i = 0; i < headers.num; ++i) { u8 *begin = b.begin + headers.offset + i * ELF_SECTION_HEADER_SIZE; i64 name_index = (i64) read_u32(LE, begin, b.end); Offset_Size s = elf_name_in_string_table(b, names, name_index); if (s.size == name_size && bx_mem_eq(b.begin + s.offset, name, name_size)) return i; } return 0; } Section_Header elf_section( Buffer_Context b, i64 index ) { Offset_Size names = elf_section_names_data(b); u8 * begin = b.begin + elf_section_header_offset(b, index); u8 * end = b.begin + b.elf.offset + b.elf.size; BX_CHECK(end <= b.end, "Buffer overflow", (Section_Header) {0}); i64 name_index = (i64) read_u32(LE, begin, end); i64 size = read_i64(LE, begin + 32, end); i64 entry_size = read_i64(LE, begin + 56, end); i64 num_entries = entry_size > 0 ? (size / entry_size) : 0; u32 type = read_u32(LE, begin + 4, end); u64 flags = read_u64(LE, begin + 8, end); if (type > SEC_SYMTAB_SHNDX || type == 12 || type == 13) { BX_LOG(WARNING, "Unknown section type: %d", type); type = SEC_NONE; } return (Section_Header) { .name = elf_name_in_string_table(b, names, name_index), .type = type, .alloc = (flags & 2) == 2, .write = (flags & 1) == 1, .exec = (flags & 4) == 4, .alignment = read_i64(LE, begin + 48, end), .entry_size = entry_size, .num_entries = num_entries, .data = { .offset = b.elf.offset + read_i64(LE, begin + 24, end), .size = size, }, }; } Section_Header elf_find_section_by_name( Buffer_Context b, c8 * name, i64 name_size ) { i64 index = elf_find_section_index_by_name(b, name, name_size); return index == 0 ? (Section_Header) {0} : elf_section(b, index); } c8 *elf_name_from_offset( Buffer_Context b, Offset_Size name ) { if (name.size == 0) return ""; c8 *begin = (c8 *) (b.begin + name.offset); i64 len = bx_str_len(begin, (c8 *) b.end); BX_CHECK((i64) name.size == len, "Buffer overflow", ""); return begin; } i64 elf_find_related_section_index( Buffer_Context b, i64 section_index ) { Offset_Size src_name = elf_section(b, section_index).name; Section_Header dst = elf_section(b, section_index - 1); if (src_name.size > dst.name.size && bx_mem_eq( elf_name_from_offset(b, src_name) + (src_name.size - dst.name.size), elf_name_from_offset(b, dst.name), dst.name.size)) return section_index - 1; i64 num_sections = elf_section_headers(b).num; for (i64 i = 0; i < num_sections; ++i) { if (i == section_index || i + 1 == section_index) continue; dst = elf_section(b, i); if (src_name.size > dst.name.size && bx_mem_eq( elf_name_from_offset(b, src_name) + (src_name.size - dst.name.size), elf_name_from_offset(b, dst.name), dst.name.size)) { BX_LOG(WARNING, "Unexpected section order"); return i; } } BX_FAIL("Not found", 0); } Offset_Size elf_find_related_data( Buffer_Context b, i64 section_index ) { return elf_section(b, elf_find_related_section_index(b, section_index)).data; } Symbol_Entry elf_symbol( Buffer_Context b, Offset_Size symbol_table, Offset_Size string_table, i64 symbol_index ) { u8 *begin = b.begin + symbol_table.offset + symbol_index * ELF_SYMBOL_ENTRY_SIZE; u8 *end = b.begin + symbol_table.offset + symbol_table.size; BX_CHECK(end <= b.end, "Buffer overflow", (Symbol_Entry) {0}); BX_CHECK(end <= b.begin + b.elf.offset + b.elf.size, "Buffer overflow", (Symbol_Entry) {0}); i64 sym_name = (i64) read_u32(LE, begin, end); u8 sym_info = read_u8 (LE, begin + 4, end); i64 sym_shndx = (i64) read_u16(LE, begin + 6, end); i64 sym_value = read_i64(LE, begin + 8, end); i64 sym_size = read_i64(LE, begin + 16, end); u8 type = (sym_info & 0xf) == 0 ? SYM_NONE : (sym_info & 0xf) == 1 ? SYM_DATA : (sym_info & 0xf) == 2 ? SYM_PROC : (sym_info & 0xf) == 3 ? SYM_SECTION : (sym_info & 0xf) == 5 ? SYM_COMMON : (sym_info & 0xf) == 6 ? SYM_TLS : SYM_SPECIFIC; u8 bind = (sym_info >> 4) == 1 ? BIND_GLOBAL : (sym_info >> 4) == 2 ? BIND_WEAK : BIND_LOCAL; return (Symbol_Entry) { .name = elf_name_in_string_table(b, string_table, sym_name), .type = type, .bind = bind, .section = sym_shndx, .value = { .offset = sym_value, .size = sym_size, }, }; } Relx_Entry elf_rel( Buffer_Context b, Offset_Size symbol_table, Offset_Size string_table, Offset_Size relocations, i64 rel_index ) { u8 *begin = b.begin + relocations.offset + rel_index * ELF_REL_ENTRY_SIZE; u8 *end = begin + ELF_REL_ENTRY_SIZE; BX_CHECK(end <= b.end, "Buffer overflow", (Rel_Entry) {0}); BX_CHECK(end <= b.begin + b.elf.offset + b.elf.size, "Buffer overflow", (Rel_Entry) {0}); BX_CHECK(end <= b.begin + relocations.offset + relocations.size, "Buffer overflow", (Rel_Entry) {0}); i64 rel_offset = read_i64(LE, begin, end); u32 rel_type = read_u32(LE, begin + 8, end); i64 rel_sym = (i64) read_u32(LE, begin + 12, end); return (Relx_Entry) { .symbol = elf_symbol(b, symbol_table, string_table, rel_sym), .offset = rel_offset, .type = rel_type, .addent = 0, }; } Relx_Entry elf_rela( Buffer_Context b, Offset_Size symbol_table, Offset_Size string_table, Offset_Size relocations, i64 rela_index ) { u8 *begin = b.begin + relocations.offset + rela_index * ELF_RELA_ENTRY_SIZE; u8 *end = begin + ELF_RELA_ENTRY_SIZE; BX_CHECK(end <= b.end, "Buffer overflow", (Rela_Entry) {0}); BX_CHECK(end <= b.begin + b.elf.offset + b.elf.size, "Buffer overflow", (Rela_Entry) {0}); BX_CHECK(end <= b.begin + relocations.offset + relocations.size, "Buffer overflow", (Rela_Entry) {0}); i64 rela_offset = read_i64(LE, begin, end); u32 rela_type = read_u32(LE, begin + 8, end); i64 rela_sym = (i64) read_u32(LE, begin + 12, end); i64 rela_addent = read_i64(LE, begin + 16, end); return (Relx_Entry) { .symbol = elf_symbol(b, symbol_table, string_table, rela_sym), .offset = rela_offset, .type = rela_type, .addent = rela_addent, }; } Symbol_Entry elf_find_symbol_by_name( Buffer_Context b, i64 symbol_table_index, Offset_Size string_table, c8 * name, i64 name_size ) { Section_Header symbol_table = elf_section(b, symbol_table_index); for (i64 i = 0; i < symbol_table.num_entries; ++i) { Symbol_Entry sym = elf_symbol(b, symbol_table.data, string_table, i); BX_CHECK(b.begin + sym.name.offset + name_size <= b.end, "Buffer overflow", (Symbol_Entry) {0}); BX_CHECK(sym.name.offset + name_size <= b.elf.size, "Buffer overflow", (Symbol_Entry) {0}); if (name_size == sym.name.size && bx_mem_eq(name, b.begin + sym.name.offset, name_size)) return sym; } BX_FAIL("Not found", (Symbol_Entry) {0}); } void elf_checks(Buffer_Context b) { u8 *begin = b.begin + b.elf.offset; u8 *end = begin + b.elf.size; u8 osabi = read_u8(LE, begin + 7, end); BX_CHECK( read_u8 (LE, begin, end) == ELF_MAGIC[0], "Invalid ELF file",); BX_CHECK( read_u8 (LE, begin + 1, end) == ELF_MAGIC[1], "Invalid ELF file",); BX_CHECK( read_u8 (LE, begin + 2, end) == ELF_MAGIC[2], "Invalid ELF file",); BX_CHECK( read_u8 (LE, begin + 3, end) == ELF_MAGIC[3], "Invalid ELF file",); BX_CHECK( read_u8 (LE, begin + 4, end) == ELF_64, "Unsupported ELF file",); BX_CHECK( read_u8 (LE, begin + 5, end) == ELF_2_LE, "Unsupported ELF file",); BX_CHECK( read_u8 (LE, begin + 6, end) == ELF_VERSION, "Unsupported ELF file",); BX_CHECK( osabi == ELF_SYS_V || osabi == ELF_LINUX, "Unsupported ELF file",); BX_CHECK( read_u8 (LE, begin + 8, end) == ELF_ABI_VERSION, "Unsupported ELF file",); BX_CHECK( read_u16(LE, begin + 16, end) == ELF_RELOCATABLE, "Unsupported ELF file",); BX_CHECK( read_u16(LE, begin + 18, end) == ELF_X86_64, "Unsupported ELF file",); BX_CHECK( read_u32(LE, begin + 20, end) == ELF_VERSION, "Unsupported ELF file",); BX_LAX( read_u64(LE, begin + 24, end) == 0, "Invalid entry point"); BX_LAX( read_u64(LE, begin + 32, end) == 0, "Invalid program header offset"); BX_LAX( read_u32(LE, begin + 48, end) == 0, "Invalid flags"); BX_LAX( read_u16(LE, begin + 52, end) == ELF_HEADER_SIZE, "Invalid ELF header size"); BX_LAX( read_u16(LE, begin + 54, end) == 0, "Invalid program header size"); BX_LAX( read_u16(LE, begin + 56, end) == 0, "Invalid num program headers"); BX_LAX( read_u16(LE, begin + 58, end) == ELF_SECTION_HEADER_SIZE, "Invalid section header size"); } void elf_dump(u32 log_level, Buffer_Context b) { Offset_Num headers = elf_section_headers(b); Offset_Size strtab = elf_find_section_by_name(b, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; Offset_Size symtab = elf_find_section_by_name(b, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; for (i64 sec_index = 1; sec_index < headers.num; ++sec_index) { Section_Header section = elf_section(b, sec_index); c8 *name = elf_name_from_offset(b, section.name); BX_LOG( log_level, "\"%s%s\x1b[37m\"%*s%-14s%s%s%s%s%lld%s", section.type == SEC_SYMTAB || section.type == SEC_RELA || section.type == SEC_REL ? "\x1b[32m" : section.alloc ? "\x1b[34m" : section.type == SEC_STRTAB ? "\x1b[33m" : "\x1b[31m", name, (i32) (section.name.size < 30 ? 30 - section.name.size : 1), "", SEC_TYPE_NAMES[section.type], section.alloc ? "R" : "_", section.write ? "W" : "_", section.exec ? "X" : "_", section.data.size > 0 ? " - " : "", section.data.size, section.data.size > 0 ? " bytes" : "\b " ); switch (section.type) { case SEC_SYMTAB: BX_LOG(log_level, " - -"); for (i64 sym_index = 1; sym_index < section.num_entries; ++sym_index) { Symbol_Entry sym = elf_symbol(b, section.data, strtab, (u16) sym_index); c8 *name = elf_name_from_offset(b, sym.name); i32 len = (sym.name.size == 0) ? 4 : (i32) sym.name.size; BX_LOG( log_level, " %s%s%s\x1b[37m%s %.*s %s%-7s %s\x1b[37m", *name != '\0' ? "\"" : "", sym.bind == BIND_GLOBAL ? "\x1b[32m" : sym.bind == BIND_WEAK ? "\x1b[35m" : "\x1b[31m", *name != '\0' ? name : "", *name != '\0' ? "\"" : "", 31 < len ? 1 : 32 - len, 31 < len ? " " : "........................................", sym.type == SYM_PROC ? "\x1b[32m" : sym.type == SYM_DATA ? "\x1b[32m" : sym.type == SYM_COMMON ? "\x1b[33m" : sym.type == SYM_TLS ? "\x1b[35m" : sym.type == SYM_SECTION ? "\x1b[31m" : sym.type == SYM_SPECIFIC ? "\x1b[31m" : "", SYM_TYPE_NAMES[sym.type], sym.section == 0 ? ( sym.bind == BIND_GLOBAL || sym.bind == BIND_WEAK ? "\x1b[33mundefined" : "\x1b[31mundefined") : "" ); } BX_LOG(log_level, " - -"); break; case SEC_RELA: { BX_LOG(log_level, " - -"); for (i64 rela_index = 0; rela_index < section.num_entries; ++rela_index) { Relx_Entry relx = elf_rela(b, symtab, strtab, section.data, rela_index); BX_LOG( log_level, " %-16s %08llx %-+5lld <= %s%08llx\x1b[37m%s\x1b[37m \"%s\"", REL_NAMES[relx.type], relx.offset, relx.addent, relx.symbol.bind == BIND_WEAK ? "\x1b[33m" : "\x1b[32m", relx.symbol.value.offset + elf_section(b, relx.symbol.section).data.offset, relx.symbol.type == SYM_DATA ? " \x1b[34mdata" : relx.symbol.type == SYM_COMMON ? " \x1b[32mdata" : relx.symbol.type == SYM_TLS ? " \x1b[34mdata" : relx.symbol.type == SYM_PROC ? " \x1b[34mproc" : relx.symbol.type == SYM_SECTION ? " \x1b[36msect" : relx.symbol.type == SYM_SPECIFIC ? " \x1b[34mspec" : " \x1b[33mnone", elf_name_from_offset(b, relx.symbol.name) ); } BX_LOG(log_level, " - -"); } break; case SEC_REL: { BX_LOG(log_level, " - -"); for (i64 rel_index = 0; rel_index < section.num_entries; ++rel_index) { Relx_Entry relx = elf_rel(b, symtab, strtab, section.data, rel_index); BX_LOG( log_level, " %-16s %08llx <= %s%08llx\x1b[37m%s\x1b[37m \"%s\"", REL_NAMES[relx.type], relx.offset, relx.symbol.bind == BIND_WEAK ? "\x1b[33m" : "\x1b[32m", relx.symbol.value.offset + elf_section(b, relx.symbol.section).data.offset, relx.symbol.type == SYM_DATA ? " \x1b[34mdata" : relx.symbol.type == SYM_COMMON ? " \x1b[32mdata" : relx.symbol.type == SYM_TLS ? " \x1b[35mdata" : relx.symbol.type == SYM_PROC ? " \x1b[34mproc" : relx.symbol.type == SYM_SECTION ? " \x1b[36msect" : relx.symbol.type == SYM_SPECIFIC ? " \x1b[34mspec" : " \x1b[33mnone", elf_name_from_offset(b, relx.symbol.name) ); } BX_LOG(log_level, " - -"); } break; default:; } } BX_LOG(log_level, ""); } void unit_write(Pool *pool, i64 unit, u16 target, i64 io_out, void *io_user_data) { BX_CHECK(pool != NULL && pool->entities != NULL, "Invalid arguments",); BX_CHECK(pool->entities[unit].is_enabled, "Unit does not exist",); BX_CHECK(pool->entities[unit].unit.entry_point_index != UNDEFINED, "No entry point",); BX_CHECK(target == (FORMAT_ELF | ARCH_X86_64), "Target not supported",); BX_CHECK(pool->obj_file_buffer != NULL, "No object file buffer",); BX_CHECK(pool->dependencies_buffer != NULL, "No dependencies buffer",); BX_CHECK(pool->obj_file_offsets != NULL, "No object file offsets buffer",); // ============================================================== // // Our program u8 code[32] = { 0xb8, 0x3c, 0x00, 0x00, 0x00, // mov eax, 60 0xbf, 0x2a, 0x00, 0x00, 0x00, // mov edi, 42 0x0f, 0x05, // syscall }; i64 num_program_headers = 1; i64 data_offset = bx_align(ELF_HEADER_SIZE + ELF_PROGRAM_HEADER_SIZE * num_program_headers, X86_64_ALIGNMENT); i64 code_size = bx_align(sizeof code, X86_64_ALIGNMENT); i64 entry_offset = 0; i64 base_address = X86_64_BASE_ADDRESS; i64 code_address = base_address + data_offset; i64 entry = code_address + entry_offset; Internal_Symbol_Entry sym_printf = {0}; i64 text_size = code_size; i64 data_size = 0; i64 bss_size = 0; i64 rodata_size = 0; // ============================================================== // // Reading dependencies i64 num_obj_files = 0; i64 obj_files_size = 0; i64 num_sections = 0; i64 num_symbols = 0; i64 not_found_size = 0; // Read all dependency files into the memory // Unit *u = &pool->entities[unit].unit; for (i64 link_index = 0; link_index < u->num_links; ++link_index) { i64 id = u->links[link_index]; if (id == UNDEFINED) continue; Unit *l = &pool->entities[id].unit; BX_CHECK(pool->entities[id].is_enabled, "Internal",); BX_CHECK(l->type == UNIT_LIBRARY_STATIC, "Link type not supported",); BX_CHECK(l->name_size > 0, "No link name",); BX_CHECK(l->name_size <= MAX_NAME_SIZE, "Link name too big",); i64 f = io_open_read(l->name_size, l->name, io_user_data); io_seek(f, 0, IO_SEEK_END, io_user_data); i64 in_size = io_tell(f, io_user_data); BX_CHECK(in_size <= pool->max_obj_file_size, "AR file too big",); io_seek(f, 0, IO_SEEK_BEGIN, io_user_data); i64 n = io_read(f, in_size, pool->obj_file_buffer, io_user_data); BX_CHECK(n == in_size, "Read failed",); io_close(f, io_user_data); // ======================================================== // // Read AR library u8 *ar_begin = pool->obj_file_buffer; u8 *ar_end = pool->obj_file_buffer + in_size; BX_CHECK(bx_mem_eq(ar_begin, AR_MAGIC, 8), "Invalid AR file",); u8 *f_begin = ar_begin + 8; while (f_begin + 60 < ar_end) { u8 *f_id = f_begin; u8 *f_size = f_begin + 48; u8 *f_end = f_begin + 58; u8 *f_data = f_begin + 60; i64 size = (i64) bx_u64_from_dec_str((c8 *) f_size, (c8 *) f_size + 10); size = bx_align(size, 2); BX_CHECK(bx_mem_eq(f_end, "\x60\x0a", 2), "Invalid AR file",); BX_CHECK(f_begin + size <= ar_end, "Buffer overflow",); if (!bx_mem_eq(f_id, AR_SYMBOL_TABLE, 16) && !bx_mem_eq(f_id, AR_STRING_TABLE, 16)) { // Read ELF object file i64 delta_size = bx_align(size, X86_64_ALIGNMENT); BX_CHECK(obj_files_size + delta_size < pool->max_dependencies_size, "Out of memory",); BX_CHECK(num_obj_files + 1 < pool->max_num_obj_files, "Out of memory",); bx_mem_cpy(pool->dependencies_buffer + obj_files_size, f_data, size); pool->obj_file_offsets[num_obj_files] = obj_files_size; obj_files_size += delta_size; pool->obj_file_offsets[++num_obj_files] = obj_files_size; } f_begin = f_data + size; } } // ========================================================== // // Calculate section offsets for (i64 elf_index = 0; elf_index < num_obj_files; ++elf_index) { Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); elf_checks(buf); //elf_dump(VERBOSE, buf); Offset_Num headers = elf_section_headers(buf); for (i64 sec_index = 1; sec_index < headers.num; ++sec_index, ++num_sections) { BX_CHECK(num_sections < pool->max_num_sections, "Too many sections",); pool->section_addresses[num_sections] = 0; Section_Header section = elf_section(buf, sec_index); if (!section.alloc) continue; if (section.exec) { pool->section_addresses[num_sections] = base_address + data_offset + text_size; text_size += bx_align(section.data.size, X86_64_ALIGNMENT); } else if (section.write && section.type == SEC_NOBITS) { pool->section_addresses[num_sections] = base_address + data_offset + bss_size; bss_size += bx_align(section.data.size, X86_64_ALIGNMENT); } else if (section.write) { pool->section_addresses[num_sections] = base_address + data_offset + data_size; data_size += bx_align(section.data.size, X86_64_ALIGNMENT); } else if (section.data.size > 0) { pool->section_addresses[num_sections] = base_address + data_offset + rodata_size; rodata_size += bx_align(section.data.size, X86_64_ALIGNMENT); } else { BX_LAX(0, "Unsupported section type"); continue; } } } // ========================================================== // // Relocate defined symbols for (i64 elf_index = 0, sec_index_global = 0; elf_index < num_obj_files; ++elf_index) { Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); Offset_Num headers = elf_section_headers(buf); Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; for (i64 sec_index = 1; sec_index < headers.num; ++sec_index) { Section_Header tab = elf_section(buf, sec_index); if (tab.type != SEC_SYMTAB) continue; for (i64 sym_index = 1; sym_index < tab.num_entries; ++sym_index) { Symbol_Entry sym = elf_symbol(buf, tab.data, strtab, sym_index); c8 * sym_name = elf_name_from_offset(buf, sym.name); if (sym.section == 0) // undefined symbol continue; i64 sym_section = sec_index_global + sym.section - 1; BX_CHECK(sym_section < pool->max_num_sections, "Buffer overflow",); i64 sym_address = pool->section_addresses[sym_section] + sym.value.offset; BX_CHECK(num_symbols < pool->max_num_symbols, "Too many symbols",); pool->symbols[num_symbols++] = (Internal_Symbol_Entry) { .name_size = sym.name.size, .name = sym_name, .section = sym_section, .address = sym_address, .size = sym.value.size, }; u8 *begin = buf.begin + tab.data.offset + sym_index * ELF_SYMBOL_ENTRY_SIZE; u8 *end = begin + tab.data.size; if (end > buf.end) end = buf.end; write_i64(LE, sym_address, begin + 8, end); } } sec_index_global += elf_section_headers(buf).num - 1; } // ========================================================== // // TODO Add runtime library symbols // // _DYNAMIC // _GLOBAL_OFFSET_TABLE_ // // _Unwind_Resume // _Unwind_Backtrace // _Unwind_ForcedUnwind // _Unwind_GetIP // _Unwind_GetCFA // // _init // _end // _fini // _dl_rtld_map // __ehdr_start // __pthread_initialize_minimal // __init_array_start // __init_array_end // __fini_array_start // __fini_array_end // __rela_iplt_start // __rela_iplt_end // __preinit_array_start // __preinit_array_end // __start___libc_atexit // __stop___libc_atexit // __stop___libc_IO_vtables // __start___libc_IO_vtables // __start___libc_subfreeres // __stop___libc_subfreeres // __start___libc_freeres_ptrs // __stop___libc_freeres_ptrs // __gcc_personality_v0 // // __addtf3 // __subtf3 // __multf3 // __divtf3 // __eqtf2 // __lttf2 // __letf2 // __gttf2 // __getf2 // __unordtf2 // ============================================================== // // TODO Apply relocations for (i64 elf_index = 0, sec_index_global = 0; elf_index < num_obj_files; ++elf_index) { Buffer_Context buf = elf_buffer_context(pool, num_obj_files, elf_index); i64 num_sections = elf_section_headers(buf).num; Offset_Size strtab = elf_find_section_by_name(buf, SECTION_STRTAB, sizeof SECTION_STRTAB - 1).data; Offset_Size symtab = elf_find_section_by_name(buf, SECTION_SYMTAB, sizeof SECTION_SYMTAB - 1).data; for (i64 sec_index = 1; sec_index < num_sections; ++sec_index, ++sec_index_global) { Section_Header src_sec = elf_section(buf, sec_index); if (src_sec.type != SEC_REL && src_sec.type != SEC_RELA) continue; i64 dst_index = elf_find_related_section_index(buf, sec_index); BX_CHECK(dst_index >= 0 && dst_index < pool->max_num_sections, "Buffer overflow",); for (i64 entry_index = 0; entry_index < src_sec.num_entries; ++entry_index) { Relx_Entry relx = (src_sec.type == SEC_REL) ? elf_rel (buf, symtab, strtab, src_sec.data, entry_index) : elf_rela(buf, symtab, strtab, src_sec.data, entry_index); c8 *sym_name = elf_name_from_offset(buf, relx.symbol.name); Internal_Symbol_Entry symbol = {0}; if (relx.symbol.section == 0) { for (i64 i = 0; i < num_symbols; ++i) if (pool->symbols[i].name_size == relx.symbol.name.size && bx_mem_eq( pool->symbols[i].name, sym_name, relx.symbol.name.size )) { symbol = pool->symbols[i]; break; } if (symbol.name_size == 0 && bx_find_str_in_table( pool->not_found_buffer, pool->not_found_buffer + not_found_size, sym_name, sym_name + relx.symbol.name.size ) == NULL) { BX_LOG(WARNING, "Symbol not found: %s", sym_name); BX_CHECK(not_found_size + relx.symbol.name.size + 1 <= pool->max_not_found_size, "Out of memory",); bx_mem_cpy(pool->not_found_buffer + not_found_size, sym_name, relx.symbol.name.size); not_found_size += relx.symbol.name.size + 1; } } else symbol = (Internal_Symbol_Entry) { .section = sec_index_global, .address = relx.symbol.value.offset, .size = relx.symbol.value.size, }; u8 *dst = buf.begin + elf_section(buf, dst_index).data.offset + relx.offset; // Represents the addend used to compute the value of the relocatable field. i64 A = relx.addent; // Represents the base address at which a shared object has been loaded into memory during execution. Generally, a shared object is built with a 0 base virtual address, but the execution address will be different. i64 B = pool->section_addresses[dst_index]; // Represents the offset into the global offset table at which the relocation entry's symbol will reside during execution. i64 G = symbol.address - base_address - data_offset; // Represents the address of the global offset table. i64 GOT = base_address + data_offset; // Represents the place (section offset or address) of the Procedure Linkage Table entry for a symbol. i64 L = base_address + data_offset; // Represents the place (section offset or address) of the storage unit being relocated (computed using r_offset). i64 P = pool->section_addresses[dst_index] + relx.offset; // Represents the value of the symbol whose index resides in the relocation entry. i64 S = symbol.address; // The size of the symbol whose index resides in the relocation entry. i64 Z = symbol.size; switch (relx.type) { #define ADD_(BITS, OP) \ do { \ i64 x_ = read_i##BITS(LE, dst, buf.end) + (OP); \ write_i##BITS(LE, (i##BITS) x_, dst, buf.end); \ } while (0) #define TODO_ BX_FAIL("Not implemented",) case R_X86_64_NONE: /* Do nothing */ break; case R_X86_64_64: ADD_(64, S + A); break; case R_X86_64_PC32: ADD_(32, S + A - P); break; case R_X86_64_GOT32: ADD_(32, G + A); break; case R_X86_64_PLT32: ADD_(32, L + A - P); break; case R_X86_64_COPY: /* Do nothing */ break; case R_X86_64_GLOB_DAT: ADD_(64, S); break; case R_X86_64_JUMP_SLOT: ADD_(64, S); break; case R_X86_64_RELATIVE: ADD_(64, B + A); break; case R_X86_64_GOTPCREL: ADD_(32, G + GOT + A - P); break; case R_X86_64_32: ADD_(32, S + A); break; case R_X86_64_32S: ADD_(32, S + A); break; case R_X86_64_16: ADD_(16, S + A); break; case R_X86_64_PC16: ADD_(16, S + A - P); break; case R_X86_64_8: ADD_(8, S + A); break; case R_X86_64_PC8: ADD_(8, S + A - P); break; case R_X86_64_DTPMOD64: TODO_; break; case R_X86_64_DTPOFF64: TODO_; break; case R_X86_64_TPOFF64: TODO_; break; case R_X86_64_TLSGD: TODO_; break; case R_X86_64_TLSLD: TODO_; break; case R_X86_64_DTPOFF32: TODO_; break; case R_X86_64_GOTTPOFF: ADD_(32, S - GOT); break; case R_X86_64_TPOFF32: ADD_(32, S + A - B); break; case R_X86_64_PC64: ADD_(64, S + A - P); break; case R_X86_64_GOTOFF64: TODO_; break; case R_X86_64_GOTPC32: ADD_(32, GOT + A - P); break; case R_X86_64_GOT64: TODO_; break; case R_X86_64_GOTPCREL64: TODO_; break; case R_X86_64_GOTPC64: ADD_(64, GOT + A - P); break; case R_X86_64_GOTPLT64: TODO_; break; case R_X86_64_PLTOFF64: TODO_; break; case R_X86_64_SIZE32: ADD_(32, Z + A); break; case R_X86_64_SIZE64: ADD_(64, Z + A); break; case R_X86_64_GOTPC32_TLSDESC: TODO_; break; case R_X86_64_TLSDESC_CALL: TODO_; break; case R_X86_64_TLSDESC: TODO_; break; case R_X86_64_IRELATIVE: TODO_; break; case R_X86_64_RELATIVE64: TODO_; break; case R_X86_64_GOTPCRELX: TODO_; break; case R_X86_64_REX_GOTPCRELX: ADD_(32, GOT - P + G - 4); break; default: BX_LAX(0, "Unknown relocation type"); #undef ADD_ #undef TODO_ } } } } // ============================================================== // // TODO Search symbols for (i64 i = 0; i < num_symbols; ++i) if (pool->symbols[i].name_size == 6 && bx_mem_eq(pool->symbols[i].name, "printf", 6)) { sym_printf = pool->symbols[i]; break; } // ============================================================== // // TODO Write sections into the output buffer // ============================================================== // // Writing the ELF executable // // TODO Write into memory. BX_LOG(VERBOSE, "Total %lld sections", num_sections); BX_LOG(VERBOSE, "Total %lld symbols", num_symbols); BX_LOG(VERBOSE, "Total size"); BX_LOG(VERBOSE, ".text - %lld bytes", text_size); BX_LOG(VERBOSE, ".bss - %lld bytes", bss_size); BX_LOG(VERBOSE, ".data - %lld bytes", data_size); BX_LOG(VERBOSE, ".rodata - %lld bytes", rodata_size); BX_CHECK(sym_printf.name_size != 0, "Symbol not found: printf",); BX_LOG(VERBOSE, "Found printf: %08llx", sym_printf.address); BX_LOG(VERBOSE, "Writing ELF x86_64 executable"); #define WRITE(x, n) io_write( io_out, n, x, io_user_data ) #define WRITE_V(...) io_write( io_out, sizeof((u8[]) {__VA_ARGS__}), (u8[]) {__VA_ARGS__}, io_user_data ) #define WRITE_DUP(x, n) io_write( io_out, n, (u8[n]) { 0 }, io_user_data ) #define WRITE_2(x) io_write( io_out, 2, &(u16) { x }, io_user_data ) #define WRITE_4(x) io_write( io_out, 4, &(u32) { x }, io_user_data ) #define WRITE_8(x) io_write( io_out, 8, &(u64) { x }, io_user_data ) // ELF header // WRITE ( ELF_MAGIC, 4 ); WRITE_V( ELF_64 ); WRITE_V( ELF_2_LE ); WRITE_V( ELF_VERSION ); WRITE_V( ELF_SYS_V ); WRITE_V( ELF_ABI_VERSION ); WRITE_DUP(0, 7); // padding WRITE_2( ELF_EXECUTABLE ); WRITE_2( ELF_X86_64 ); WRITE_4( ELF_VERSION ); WRITE_8( entry ); WRITE_8( ELF_HEADER_SIZE ); WRITE_8( 0 ); // section header offset WRITE_4( 0 ); // flags WRITE_2( ELF_HEADER_SIZE ); WRITE_2( ELF_PROGRAM_HEADER_SIZE ); WRITE_2( 1 ); // num program headers WRITE_2( 0 ); // section header size WRITE_2( 0 ); // num section headers WRITE_2( 0 ); // string table section header index // Program header // WRITE_4( 1 ); // type (PT_LOAD) WRITE_4( 5 ); // flags (PF_X | PF_R) WRITE_8( data_offset ); WRITE_8( code_address ); // virtual address WRITE_8( code_address ); // phisical address WRITE_8( code_size ); // size in file WRITE_8( code_size ); // size in memory WRITE_8( X86_64_ALIGNMENT ); // Code // for (i64 i = data_offset - ELF_HEADER_SIZE - ELF_PROGRAM_HEADER_SIZE; i > 0; --i) WRITE_V( 0 ); WRITE( code, code_size ); #undef WRITE_V #undef WRITE_DUP #undef WRITE_32 #undef WRITE_64 #undef WRITE // ============================================================== // // Cleanup bx_mem_set(pool->obj_file_buffer, 0, pool->max_obj_file_size); bx_mem_set(pool->dependencies_buffer, 0, pool->max_dependencies_size); bx_mem_set(pool->obj_file_offsets, 0, pool->max_num_obj_files * sizeof *pool->obj_file_offsets); bx_mem_set(pool->section_addresses, 0, pool->max_num_sections * sizeof *pool->section_addresses); bx_mem_set(pool->symbols, 0, pool->max_num_symbols * sizeof *pool->symbols); } i64 io_open_read(i64 name_size, c8 *name, void *user_data) { i64 f; io_dispatch(IO_OPEN_READ, &f, &name_size, name, user_data); return f; } i64 io_open_write(i64 name_size, c8 *name, void *user_data) { i64 f; io_dispatch(IO_OPEN_WRITE, &f, &name_size, name, user_data); return f; } void io_close(i64 f, void *user_data) { io_dispatch(IO_CLOSE, &f, NULL, NULL, user_data); } b8 io_seek(i64 f, i64 offset, u16 origin, void *user_data) { io_dispatch(IO_SEEK, &f, &offset, &origin, user_data); return 1; } i64 io_tell(i64 f, void *user_data) { i64 offset; io_dispatch(IO_TELL, &f, &offset, NULL, user_data); return offset; } i64 io_read(i64 f, i64 size, void *data, void *user_data) { io_dispatch(IO_READ, &f, &size, data, user_data); return size; } i64 io_write(i64 f, i64 size, void *data, void *user_data) { io_dispatch(IO_WRITE, &f, &size, data, user_data); return size; } void io_chmod_exe(i64 f, void *user_data) { io_dispatch(IO_CHMOD_EXE, &f, NULL, NULL, user_data); } // ================================================================ // // * Helper procedures // // ================================================================ #if HELPERS #include #include #include #include #ifdef __unix__ #include #include #endif void wait_any_input(void) { while (getc(stdin) != '\n'); fflush(stdin); } void bx_log(i32 log_level, u32 line, c8 *file, c8 *format, ...) { if (file == NULL || format == NULL) return; if (*format == '\0' && log_level != TRACE) { fprintf(log_level == ERROR || log_level == WARNING ? stderr : stdout, "\n"); return; } c8 message[256]; va_list ap; va_start(ap, format); vsprintf(message, format, ap); va_end(ap); fflush(stdout); i32 len = 56 - (i32) bx_str_len_or(message, message + 56, 56); switch (log_level) { case ERROR: fprintf(stderr, "\r\x1b[41;1m\x1b[30m Error \x1b[40;0m\x1b[37m %s " "%.*s \x1b[36m%s\x1b[34m:%d\x1b[37m\n", message, len, "................................................................", file, line); if (LOG_BLOCKING) wait_any_input(); break; case WARNING: fprintf(stderr, "\r\x1b[43;1m\x1b[30m Warning \x1b[40;0m\x1b[37m %s " "%.*s \x1b[36m%s\x1b[34m:%d\x1b[37m\n", message, len, "................................................................", file, line); if (LOG_BLOCKING) wait_any_input(); break; case INFO: fprintf(stdout, "\r\x1b[42;1m\x1b[30m Info \x1b[40;0m\x1b[37m %s\n", message); break; case VERBOSE: fprintf(stdout, "\r\x1b[47;1m\x1b[30m Verbose \x1b[40;0m\x1b[37m %s\n", message); break; case TRACE: fprintf(stdout, "\r\x1b[45;1m\x1b[30m Trace \x1b[40;0m\x1b[37m %s " "%.*s \x1b[36m%s\x1b[34m:%d\x1b[37m\n", message, len, "................................................................", file, line); if (TRACE_BLOCKING) wait_any_input(); break; default:; } } void bx_assert(b8 condition, c8 *message, u32 line, c8 *file) { if (condition) return; bx_log(ERROR, line, file, message); exit(-1); } // IO dispatch procedure // void io_dispatch(i16 op, i64 *id, i64 *size, void *data, void *user_data) { BX_CHECK(id != NULL, "Invalid arguments",); (void) user_data; FILE **f = (FILE **) id; c8 buf[MAX_NAME_SIZE] = { 0 }; switch (op) { case IO_OPEN_READ: case IO_OPEN_WRITE: BX_CHECK(size != NULL, "Invalid arguments",); BX_CHECK(*size > 0 && *size < MAX_NAME_SIZE, "Invalid arguments",); BX_CHECK(data != NULL, "Invalid arguments",); bx_mem_cpy(buf, data, *size); *f = fopen(buf, op == IO_OPEN_READ ? "rb" : "wb"); BX_CHECK(*f != NULL, "File open failed",); break; case IO_CLOSE: BX_CHECK(*f != NULL, "Invalid arguments",); BX_CHECK(size == NULL, "Invalid arguments",); BX_CHECK(data == NULL, "Invalid arguments",); fclose(*f); break; case IO_SEEK: { BX_CHECK(*f != NULL, "Invalid arguments",); BX_CHECK(size != NULL, "Invalid arguments",); BX_CHECK(data != NULL, "Invalid arguments",); u16 *origin = (u16 *) data; if (!(*origin == IO_SEEK_CURSOR && *size == 0)) { BX_CHECK(*origin == IO_SEEK_CURSOR || *origin == IO_SEEK_BEGIN || *origin == IO_SEEK_END, "Invalid arguments",); i32 s = fseek(*f, *size, *origin == IO_SEEK_CURSOR ? SEEK_CUR : *origin == IO_SEEK_BEGIN ? SEEK_SET : SEEK_END); BX_CHECK(s == 0, "File seek failed",); } } break; case IO_TELL: { BX_CHECK(*f != NULL, "Invalid arguments",); BX_CHECK(size != NULL, "Invalid arguments",); BX_CHECK(data == NULL, "Invalid arguments",); i64 n = (i64) ftell(*f); BX_CHECK(n >= 0, "File tell failed",); *size = n; } break; case IO_READ: BX_CHECK(*f != NULL, "Invalid arguments",); BX_CHECK(size != NULL, "Invalid arguments",); BX_CHECK(data != NULL, "Invalid arguments",); BX_CHECK(*size > 0, "Invalid arguments",); *size = fread(data, 1, *size, *f); break; case IO_WRITE: BX_CHECK(*f != NULL, "Invalid arguments",); BX_CHECK(size != NULL, "Invalid arguments",); BX_CHECK(data != NULL, "Invalid arguments",); BX_CHECK(*size > 0, "Invalid arguments",); *size = fwrite(data, 1, *size, *f); break; case IO_CHMOD_EXE: BX_CHECK(*f != NULL, "Invalid arguments",); BX_CHECK(size == NULL, "Invalid arguments",); #ifdef __unix__ fchmod(fileno(*f), 0775); #endif break; default: BX_FAIL("Invalid arguments",); } } // Global state // Pool g_pool = { // Statically allocate a large memory block. // TODO Reallocate the memory block when necessary. .capacity = MAX_NUM_ENTITIES, .entities = (Entity[MAX_NUM_ENTITIES]) {0}, .max_obj_file_size = MAX_OBJECT_FILE_SIZE, .max_dependencies_size = MAX_DEPENDENCIES_SIZE, .max_num_obj_files = MAX_NUM_OBJECT_FILES, .max_num_sections = MAX_NUM_SECTIONS, .max_num_symbols = MAX_NUM_SYMBOLS, .max_not_found_size = MAX_NOT_FOUND_SIZE, .obj_file_buffer = (u8[MAX_OBJECT_FILE_SIZE]) {0}, .dependencies_buffer = (u8[MAX_DEPENDENCIES_SIZE]) {0}, .obj_file_offsets = (i64[MAX_NUM_OBJECT_FILES]) {0}, .section_addresses = (i64[MAX_NUM_SECTIONS]) {0}, .symbols = (Internal_Symbol_Entry[MAX_NUM_SYMBOLS]) {0}, .not_found_buffer = (c8[MAX_NOT_FOUND_SIZE]) {0}, }; // Handy procedures // i64 n_i64(i64 value) { return node_data_i64(&g_pool, value); } i64 n_call(i16 convention, i64 target_proc, i64 num_args, Var *args) { return node_ctrl_call(&g_pool, convention, target_proc, num_args, args); } i64 n_call_by_name(i16 convention, c8 *name, i64 num_args, Var *args) { return node_ctrl_call_by_name(&g_pool, convention, bx_str_len(name, name + MAX_NAME_SIZE), name, num_args, args); } i64 n_ret(i64 num_vals, Var *vals) { return node_ctrl_ret(&g_pool, num_vals, vals); } i64 p_new(c8 *name) { i64 p = proc_init(&g_pool); proc_set_name(&g_pool, p, bx_str_len(name, name + MAX_NAME_SIZE), name); return p; } void p_add(i64 proc, i64 node) { proc_node_add(&g_pool, proc, node); } i64 u_new() { return unit_init(&g_pool, UNIT_CODE); } void u_add(i64 unit, i64 proc) { unit_proc_add(&g_pool, unit, proc); } void u_entry_point(i64 unit, i64 proc) { unit_set_entry_point(&g_pool, unit, proc); } void u_elf_x86_64(i64 unit, c8 *output_file_name) { i64 out = io_open_write(bx_str_len(output_file_name, output_file_name + MAX_PATH_SIZE), output_file_name, NULL); unit_write(&g_pool, unit, FORMAT_ELF | ARCH_X86_64, out, NULL); io_chmod_exe(out, NULL); io_close(out, NULL); } void l_code(i64 unit, i64 link_unit) { unit_link_add(&g_pool, unit, link_unit); } void l_object(i64 unit, c8 *object_library) { i64 l = unit_init(&g_pool, UNIT_LIBRARY_OBJECT); unit_set_name(&g_pool, l, bx_str_len(object_library, object_library + MAX_PATH_SIZE), object_library); unit_link_add(&g_pool, unit, l); } void l_static(i64 unit, c8 *static_library) { i64 l = unit_init(&g_pool, UNIT_LIBRARY_STATIC); unit_set_name(&g_pool, l, bx_str_len(static_library, static_library + MAX_PATH_SIZE), static_library); unit_link_add(&g_pool, unit, l); } #endif // ================================================================ // // EXAMPLE // // ================================================================ #if HELPERS && TESTING int main(int argc, char **argv) { (void) argc; (void) argv; BX_LOG(INFO, "bxgen " BX_VERSION); // Add the `main` procedure. i64 main = p_new("main"); // Create `42` i64 data node and add it into the `main`. i64 n0 = n_i64(42); p_add(main, n0); // Add a return statement into the `main`. // The return statement will point at the previously // created node `42`. p_add(main, // Create a ret node. n_ret( 1, // number of returned values (Var[]) {{ // the return value .num = 1, // number of elements .type = TYPE_I32, // type .node = n0, // source to get the value from }} )); // Add a compilation unit. i64 u = u_new(); // Add the `main` procedure to the compilation unit. u_add(u, main); // Set the `main` as the entry point of the compilation unit. u_entry_point(u, main); // NOTE // Codegen is not implemented so the example above won't be compiled into binary. // Instead, some dummy binary will be generated. // Link a static library. l_static(u, "/lib/x86_64-linux-gnu/libc.a"); // l_static(u, "libtest.a"); // Write the compilation unit into an executable file. u_elf_x86_64(u, "test_foo"); BX_CHECK(HOST == HOST_Linux, "Host system is not compatible", -1); BX_CHECK(HO == LE, "Host data ordering is not compatible", -1); // Run the created executable file. i32 ret = system("./test_foo"); BX_CHECK(WEXITSTATUS(ret) == 42, "Failure", -1); BX_LOG(INFO, "Bye!"); return 0; } #endif #endif