Micro-optimize clearing of lines

Use a doubling strategy to memset arrays to a fixed value. Makes the
memset O(log(N)) from O(N) in number of calls to memcpy.
This commit is contained in:
Kovid Goyal 2024-01-20 13:19:09 +05:30
parent d0621cb82a
commit 06da31019c
No known key found for this signature in database
GPG key ID: 06BC317B515ACE7C
3 changed files with 18 additions and 10 deletions

View file

@ -427,3 +427,13 @@ SPRITE_MAP_HANDLE alloc_sprite_map(unsigned int, unsigned int);
SPRITE_MAP_HANDLE free_sprite_map(SPRITE_MAP_HANDLE);
const char* get_hyperlink_for_id(const HYPERLINK_POOL_HANDLE, hyperlink_id_type id, bool only_url);
void log_event(const char *format, ...) __attribute__((format(printf, 1, 2)));
#define memset_array(array, val, count) if ((count) > 0) { \
(array)[0] = (val); \
size_t __copied__ = 1; \
while (__copied__ < (count)) { \
const size_t __num__ = MIN(__copied__, (count) - __copied__); \
memcpy((array) + __copied__, (array), __num__ * sizeof((val))); \
__copied__ += __num__; \
} \
}

View file

@ -154,7 +154,7 @@ linebuf_clear_lines(LineBuf *self, const Cursor *cursor, index_type start, index
#define lineptr(which, i) which##_lineptr(self, self->line_map[i])
GPUCell *first_gpu_line = lineptr(gpu, start);
const GPUCell gc = cursor_as_gpu_cell(cursor);
for (index_type i = 0; i < self->xnum; i++) memcpy(first_gpu_line + i, &gc, sizeof(GPUCell));
memset_array(first_gpu_line, gc, self->xnum);
const size_t cpu_stride = sizeof(CPUCell) * self->xnum;
memset(lineptr(cpu, start), 0, cpu_stride);
const size_t gpu_stride = sizeof(GPUCell) * self->xnum;

View file

@ -521,11 +521,10 @@ cursor_from(Line* self, PyObject *args) {
void
line_clear_text(Line *self, unsigned int at, unsigned int num, char_type ch) {
const uint16_t width = ch ? 1 : 0;
for (index_type i = at; i < MIN(self->xnum, at + num); i++) {
self->cpu_cells[i].ch = ch; memset(self->cpu_cells[i].cc_idx, 0, sizeof(self->cpu_cells[i].cc_idx));
self->cpu_cells[i].hyperlink_id = 0;
self->gpu_cells[i].attrs.width = width;
}
const CPUCell cc = {.ch=ch};
if (at + num > self->xnum) num = self->xnum > at ? self->xnum - at : 0;
memset_array(self->cpu_cells + at, cc, num);
for (index_type i = at; i < at + num; i++) self->gpu_cells[i].attrs.width = width;
}
static PyObject*
@ -545,10 +544,9 @@ line_apply_cursor(Line *self, const Cursor *cursor, unsigned int at, unsigned in
#if BLANK_CHAR != 0
#error This implementation is incorrect for BLANK_CHAR != 0
#endif
for (index_type i = at; i < self->xnum && i < at + num; i++) {
memset(self->cpu_cells + i, 0, sizeof(self->cpu_cells[0]));
memcpy(self->gpu_cells + i, &gc, sizeof(gc));
}
if (at + num > self->xnum) { num = at < self->xnum ? self->xnum - at : 0; }
memset(self->cpu_cells + at, 0, num * sizeof(CPUCell));
memset_array(self->gpu_cells + at, gc, num);
} else {
for (index_type i = at; i < self->xnum && i < at + num; i++) {
gc.attrs.width = self->gpu_cells[i].attrs.width;