diff options
author | Mitya Selivanov <automainint@guattari.tech> | 2025-02-14 11:23:59 +0100 |
---|---|---|
committer | Mitya Selivanov <automainint@guattari.tech> | 2025-02-14 11:23:59 +0100 |
commit | 236a55c6e3c6554a8ff68a9ca17053dd2c4e9cee (patch) | |
tree | 3dfe15e6fbe2e4ca1c7fabc2391efde54b67c7e8 | |
parent | cf25a8b0c9cb58baf8dccfe353a733777071fe4e (diff) | |
download | reduced_system_layer-236a55c6e3c6554a8ff68a9ca17053dd2c4e9cee.zip |
Copy pixels perf
-rwxr-xr-x | graphics.c | 26 | ||||
-rwxr-xr-x | reduced_system_layer.c | 172 |
2 files changed, 110 insertions, 88 deletions
@@ -311,20 +311,18 @@ void draw_pixels_to_buffer(Pixel_Buffer dst, Box area, Pixel_Buffer src) { if (j0 < 0) j0 = 0; if (j1 > dst.height) j1 = dst.height; - f64 w_coeff = src.width / area.width; - f64 h_coeff = src.height / area.height; - - for (i64 j = j0; j < j1; ++j) { - i64 src_j = (i64) floor((j - area.y + .5) * h_coeff); - if (src_j < 0 || src_j >= src.height) continue; - vec4_f32 *q = src.pixels + src_j * src.stride; - vec4_f32 *p = dst.pixels + j * dst.stride + i0; - for (i64 i = i0; i < i1; ++i) { - i64 src_i = (i64) floor((i - area.x + .5) * w_coeff); - if (src_i < 0 || src_i >= src.width) continue; - put_pixel_(p, q[src_i]); - ++p; - } + f64 di = src.width / area.width; + f64 dj = src.height / area.height; + f64 jj = (j0 - area.y) * dj; + for (i64 j = j0; j < j1; ++j, jj += dj) { + if (jj < 0 || jj >= src.height) continue; + vec4_f32 *d = dst.pixels + j * dst.stride + i0; + vec4_f32 *d_end = d + i1 - i0; + vec4_f32 *s = src.pixels + (i64) jj * src.stride; + f64 ii = (i0 - area.x) * di; + if (ii < 0 || ii >= src.width) continue; + for (; d < d_end; ++d, ii += di) + put_pixel_(d, s[(i64) ii]); } } diff --git a/reduced_system_layer.c b/reduced_system_layer.c index 5079f4d..5e0ef67 100755 --- a/reduced_system_layer.c +++ b/reduced_system_layer.c @@ -218,7 +218,7 @@ typedef struct { f64 v[16]; } mat4; #define REDUCED_SYSTEM_LAYER_HEADER_GUARD_ #ifdef EVERY_TEST_SUITE -#define GRAPHICS_TEST_SUITE +#define REDUCED_SYSTEM_LAYER_TEST_SUITE #endif #ifdef __cplusplus @@ -253,7 +253,7 @@ i32 main(i32 argc, c8 **argv); #endif #ifndef STATIC_MEMORY_BUFFER_SIZE -#define STATIC_MEMORY_BUFFER_SIZE (40 * 1024 * 1024) +#define STATIC_MEMORY_BUFFER_SIZE (80 * 1024 * 1024) #endif #ifndef MEMORY_CHUNK_SIZE @@ -261,7 +261,7 @@ i32 main(i32 argc, c8 **argv); #endif #ifndef DEFAULT_ANTIALIASING_SCALE -#define DEFAULT_ANTIALIASING_SCALE 4 +#define DEFAULT_ANTIALIASING_SCALE 2 #endif #ifndef MIN_PIXEL_SIZE @@ -605,7 +605,7 @@ typedef struct { i32 frame_width; i32 frame_height; f64 pixel_size; - i32 antialiasing_scale; // TODO: Global anti-aliasing. + i32 antialiasing_scale; b8 exact_resolution : 1; b8 graceful_shutdown : 1; @@ -735,16 +735,24 @@ void run_main_window_event_loop(void) { #endif } -static void mem_set_(void *dst, u8 x, u32 size) { - for (u32 i = 0; i < size; ++i) +static void mem_set_(void *dst, u8 x, i64 size) { + for (i64 i = 0; i < size; ++i) ((u8 *) dst)[i] = x; } -static void mem_cpy_(void *dst, void *src, u32 size) { - for (u32 i = 0; i < size; ++i) +static void mem_cpy_(void *dst, void *src, i64 size) { + for (i64 i = 0; i < size; ++i) ((u8 *) dst)[i] = ((u8 *) src)[i]; } +static i32 min2_i32_(i32 x, i32 y) { + return x < y ? x : y; +} + +static i64 max2_i64_(i64 x, i64 y) { + return x > y ? x : y; +} + // ================================================================ // // Log @@ -841,10 +849,18 @@ void *memory_buffer_allocate(i64 size, i64 alignment, i64 previous_size, void *p i64 chunk = (g_platform.memory_buffer_size + MEMORY_CHUNK_SIZE - 1) / MEMORY_CHUNK_SIZE; // Search free space - i64 i0 = 0; - while (i0 < occupied_len_bits && occupied[i0 / 64] == ~0ull) - i0 += 64; - for (i64 i = i0; i < occupied_len_bits; ++i) { + for (i64 i = 0;; ++i) { + while ((i % 64) == 0 + && i < occupied_len_bits + && (i + 64 <= prev_chunk || i >= prev_chunk + prev_num_chunks) + && occupied[i / 64] == ~0ull) + i += 64; + while ((i % 8) == 0 + && i < occupied_len_bits + && (i + 8 <= prev_chunk || i >= prev_chunk + prev_num_chunks) + && ((u8 *) occupied)[i / 8] == 255u) + i += 8; + if (i >= occupied_len_bits) break; b8 is_occupied = 0; for (i64 j = i; j < i + num_chunks; ++j) { if (j >= prev_chunk && j < prev_chunk + prev_num_chunks) continue; @@ -900,9 +916,11 @@ void resize_dynamic_array_exact(i64 *num, void **data, i64 element_size, i64 new if (*num == new_num) return; + i64 alignment = max2_i64_(8, element_size); + void *new_data = memory_buffer_allocate( new_num * element_size, - element_size, + alignment, *num * element_size, *data ); @@ -1479,15 +1497,7 @@ static void drop_files_set_data_(i64 index, i64 data_size) { resize_dynamic_array_exact(&f->data_size, (void **) &f->data, 1, data_size); } -static i32 min2_i32_(i32 x, i32 y) { - return x < y ? x : y; -} - -static i32 max2_i32_(i32 x, i32 y) { - return x > y ? x : y; -} - -static i8 pixel_size_update_(i32 real_width, i32 real_height) { +static i8 pixel_size_update_(i64 real_width, i64 real_height) { i8 size_changed = 0; if (g_platform.antialiasing_scale <= 0) g_platform.antialiasing_scale = DEFAULT_ANTIALIASING_SCALE; @@ -1495,8 +1505,8 @@ static i8 pixel_size_update_(i32 real_width, i32 real_height) { if (g_platform.pixel_size < MIN_PIXEL_SIZE) g_platform.pixel_size = MIN_PIXEL_SIZE; if (g_platform.pixel_size > MAX_PIXEL_SIZE) g_platform.pixel_size = MAX_PIXEL_SIZE; - i32 width = (i32) floor(((f64) real_width) / g_platform.pixel_size + .5) * g_platform.antialiasing_scale; - i32 height = (i32) floor(((f64) real_height) / g_platform.pixel_size + .5) * g_platform.antialiasing_scale; + i64 width = (i64) floor(((f64) real_width) / g_platform.pixel_size + .5) * g_platform.antialiasing_scale; + i64 height = (i64) floor(((f64) real_height) / g_platform.pixel_size + .5) * g_platform.antialiasing_scale; if (g_platform.real_width != real_width || g_platform.real_height != real_height) { size_changed = 1; @@ -1505,6 +1515,8 @@ static i8 pixel_size_update_(i32 real_width, i32 real_height) { } resize_dynamic_array_exact(&g_platform.num_pixels, (void **) &g_platform.pixels, sizeof *g_platform.pixels, width * height); + if (g_platform.num_pixels < width * height) + LOG_ERROR("Failed to allocate %lld x %lld pixel buffer.", width, height); height = g_platform.num_pixels / width; if (g_platform.frame_width != width || g_platform.frame_height != height) { @@ -1513,10 +1525,12 @@ static i8 pixel_size_update_(i32 real_width, i32 real_height) { g_platform.frame_height = height; } - i32 internal_width = max2_i32_(real_width, width / g_platform.antialiasing_scale); - i32 internal_height = max2_i32_(real_height, height / g_platform.antialiasing_scale); + i64 internal_width = max2_i64_(real_width, width / g_platform.antialiasing_scale); + i64 internal_height = max2_i64_(real_height, height / g_platform.antialiasing_scale); resize_dynamic_array_exact(&_internal_pixels_len, (void **) &_internal_pixels, sizeof *_internal_pixels, internal_width * internal_height); + if (_internal_pixels_len < internal_width * internal_height) + LOG_ERROR("Failed to allocate %lld x %lld internal pixel buffer.", internal_width, internal_height); _internal_width = real_width; _internal_height = min2_i32_(_internal_pixels_len / real_width, real_height); @@ -1562,7 +1576,7 @@ static void convert_pixels_for_window_(void) { f32 k = 1.f / (aa_scale * aa_scale); - for (i64 j = 0; j < dst_width; ++j) { + for (i64 j = 0; j < dst_height; ++j) { vec4_f32 *s = g_platform.pixels + j * src_stride; u32 *d = _internal_pixels + j * dst_width; u32 *d_end = d + dst_width; @@ -1609,16 +1623,24 @@ static void convert_pixels_for_window_(void) { return; } - i64 half_w = dst_width / 2; - i64 half_h = dst_height / 2; + if (src_len > dst_len) { + LOG_ERROR("Sanity"); + return; + } - // FIXME, PERF: Use pointers, check if src_len is less than dst_len. - for (i64 j = dst_height - 1; j >= 0; --j) - for (i64 i = dst_width - 1; i >= 0; --i) { - i64 n = j * dst_width + i; - i64 nn = ((j * src_height) / dst_height) * src_width + (i * src_width) / dst_width; - _internal_pixels[n] = _internal_pixels[nn]; - } + f64 di = ((f64) src_width) / dst_width; + f64 dj = ((f64) src_height) / dst_height; + f64 jj = src_height - dj; + for (i64 j = dst_height - 1; j >= 0; --j, jj -= dj) { + if (jj < 0 || jj >= src_height) continue; + u32 *d_first = _internal_pixels + j * dst_width; + u32 *d = d_first + dst_width - 1; + u32 *s = _internal_pixels + (i64) jj * src_width; + f64 ii = src_width - di; + if (ii < 0 || ii >= src_width) continue; + for (; d >= d_first; --d, ii -= di) + *d = s[(i64) ii]; + } } } @@ -3555,21 +3577,20 @@ b8 x11_screenshot_(i64 max_num_pixels, i64 *width, i64 *height, vec4_f32 *pixels *width = image->width; *height = image->height; - if (pixels == NULL || image->width * image->height > max_num_pixels) - return 1; + if (pixels != NULL && image->width * image->height <= max_num_pixels) + for (i64 j = 0; j < image->height; ++j) + for (i64 i = 0; i < image->width; ++i) { + vec3_f32 f = rgb_f32_from_u32_(XGetPixel(image, i, j)); - for (i64 j = 0; j < image->height; ++j) - for (i64 i = 0; i < image->width; ++i) { - vec3_f32 f = rgb_f32_from_u32_(XGetPixel(image, i, j)); - - pixels[j * image->width + i] = (vec4_f32) { - .x = f.x, - .y = f.y, - .z = f.z, - .w = 1.f, - }; - } + pixels[j * image->width + i] = (vec4_f32) { + .x = f.x, + .y = f.y, + .z = f.z, + .w = 1.f, + }; + } + XDestroyImage(image); return 1; } @@ -3734,26 +3755,26 @@ __attribute__((export_name("js_num_sound_channels"))) i32 js_num_sound_channels( #if defined(__wasm__) -static i32 _frame_width = 0; -static i32 _frame_height = 0; -static i32 _num_events = 0; -static i32 _input_size = 0; -static i32 _cursor_dx = 0; -static i32 _cursor_dy = 0; -static f64 _wheel_dx = 0; -static f64 _wheel_dy = 0; -static b8 _wait_events = 0; -static i64 _timeout = 0; -static i64 _sound_read = 0; -static b8 _files_dragged = 0; -static b8 _files_dropped = 0; - -static c8 _href [4096] = {0}; -static u32 _pixels_scaled [MAX_NUM_PIXELS] = {0}; -static u32 _internal_pixels [MAX_NUM_PIXELS] = {0}; -static b8 _key_pressed [MAX_NUM_KEYS] = {0}; -static f32 _sound_buffer [MAX_NUM_PRIMARY_SOUND_FRAMES] = {0}; -static u8 _drop_buffer [DROP_FILES_BUFFER_SIZE] = {0}; +static i32 _frame_width = 0; +static i32 _frame_height = 0; +static i32 _num_events = 0; +static i32 _input_size = 0; +static i32 _cursor_dx = 0; +static i32 _cursor_dy = 0; +static f64 _wheel_dx = 0; +static f64 _wheel_dy = 0; +static b8 _wait_events = 0; +static i64 _timeout = 0; +static i64 _sound_read = 0; +static b8 _files_dragged = 0; +static b8 _files_dropped = 0; + +static c8 _href [4096] = {0}; +static u32 _pixels_scaled [MAX_NUM_PIXELS] = {0}; +static u32 _internal_pixels [MAX_NUM_PIXELS] = {0}; +static b8 _key_pressed [MAX_NUM_KEYS] = {0}; +static f32 _sound_buffer [MAX_NUM_PRIMARY_SOUND_FRAMES] = {0}; +static u8 _drop_buffer [DROP_FILES_BUFFER_SIZE] = {0}; void shutdown_all_systems(void) { g_platform.done = 1; @@ -3818,11 +3839,11 @@ i32 handle_main_window_events(void) { g_platform.files_dragged = _files_dragged; g_platform.files_dropped = _files_dropped; - _input_size = 0; - _cursor_dx = 0; - _cursor_dy = 0; - _wheel_dx = 0; - _wheel_dy = 0; + _input_size = 0; + _cursor_dx = 0; + _cursor_dy = 0; + _wheel_dx = 0; + _wheel_dy = 0; if (_files_dropped) { _files_dragged = 0; @@ -4123,6 +4144,9 @@ __attribute__((export_name("js_drop"))) void js_drop(i32 name_len, i32 data_size #define TEST_FILE reduced_system_layer #include "test.c" +TEST("TODO") { +} + #ifndef EVERY_TEST_SUITE void update_and_render_frame(void) {} |