mirror of
https://github.com/kovidgoyal/kitty.git
synced 2026-05-13 16:37:27 +00:00
Implement grapheme segmentation in the terminal layer
This commit is contained in:
parent
167e6e73f7
commit
f5e8de2e4f
2 changed files with 84 additions and 86 deletions
162
kitty/screen.c
162
kitty/screen.c
|
|
@ -682,6 +682,10 @@ typedef struct text_loop_state {
|
|||
bool image_placeholder_marked;
|
||||
const CPUCell cc; const GPUCell g;
|
||||
CPUCell *cp; GPUCell *gp;
|
||||
GraphemeSegmentationResult seg;
|
||||
struct {
|
||||
index_type x, y; CPUCell *cc;
|
||||
} prev;
|
||||
} text_loop_state;
|
||||
|
||||
static void
|
||||
|
|
@ -712,12 +716,37 @@ clear_intersecting_selections(Screen *self, index_type y) {
|
|||
if (selection_has_screen_line(&self->url_ranges, y)) clear_selection(&self->url_ranges);
|
||||
}
|
||||
|
||||
static void
|
||||
init_prev_cell(Screen *self, text_loop_state *s) {
|
||||
zero_at_ptr(&s->prev);
|
||||
if (self->cursor->x) {
|
||||
s->prev.y = self->cursor->y;
|
||||
s->prev.x = self->cursor->x - 1;
|
||||
s->prev.cc = linebuf_cpu_cell_at(self->linebuf, s->prev.x, s->prev.y);
|
||||
} else if (self->cursor->y) {
|
||||
s->prev.y = self->cursor->y - 1;
|
||||
s->prev.x = self->columns - 1;
|
||||
s->prev.cc = linebuf_cpu_cell_at(self->linebuf, s->prev.x, s->prev.y);
|
||||
if (!s->prev.cc->next_char_was_wrapped) s->prev.cc = NULL;
|
||||
}
|
||||
}
|
||||
static void
|
||||
init_segmentation_state(Screen *self, text_loop_state *s) {
|
||||
init_prev_cell(self, s);
|
||||
grapheme_segmentation_reset(&s->seg);
|
||||
if (s->prev.cc) {
|
||||
text_in_cell(s->prev.cc, self->text_cache, self->lc);
|
||||
for (index_type i = 0; i < self->lc->count; i++) s->seg = grapheme_segmentation_step(s->seg, char_props_for(self->lc->chars[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
init_text_loop_line(Screen *self, text_loop_state *s) {
|
||||
linebuf_init_cells(self->linebuf, self->cursor->y, &s->cp, &s->gp);
|
||||
clear_intersecting_selections(self, self->cursor->y);
|
||||
linebuf_mark_line_dirty(self->linebuf, self->cursor->y);
|
||||
s->image_placeholder_marked = false;
|
||||
init_segmentation_state(self, s);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -857,10 +886,6 @@ set_active_hyperlink(Screen *self, char *id, char *url) {
|
|||
}
|
||||
}
|
||||
|
||||
static bool is_flag_pair(char_type a, char_type b) {
|
||||
return is_flag_codepoint(a) && is_flag_codepoint(b);
|
||||
}
|
||||
|
||||
static bool
|
||||
add_combining_char(Screen *self, char_type ch, index_type x, index_type y) {
|
||||
CPUCell *cpu_cells = linebuf_cpu_cells_for_line(self->linebuf, y);
|
||||
|
|
@ -886,25 +911,6 @@ add_combining_char(Screen *self, char_type ch, index_type x, index_type y) {
|
|||
}
|
||||
|
||||
|
||||
static bool
|
||||
draw_second_flag_codepoint(Screen *self, char_type ch) {
|
||||
index_type xpos = 0, ypos = 0;
|
||||
if (self->cursor->x > 1) {
|
||||
ypos = self->cursor->y;
|
||||
xpos = self->cursor->x - 2;
|
||||
} else if (self->cursor->y > 0 && self->columns > 1) {
|
||||
ypos = self->cursor->y - 1;
|
||||
xpos = self->columns - 2;
|
||||
} else return false;
|
||||
|
||||
CPUCell *cp = linebuf_cpu_cells_for_line(self->linebuf, ypos);
|
||||
CPUCell *cell = cp + xpos;
|
||||
text_in_cell(cell, self->text_cache, self->lc);
|
||||
if (self->lc->count != 1 || !is_flag_pair(self->lc->chars[0], ch)) return false;
|
||||
add_combining_char(self, ch, xpos, ypos);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
has_multiline_cells_in_span(const CPUCell *cells, const index_type start, const index_type count) {
|
||||
for (index_type x = start; x < start + count; x++) if (cells[x].y) return true;
|
||||
|
|
@ -959,22 +965,11 @@ is_emoji_presentation_base(char_type ch) {
|
|||
|
||||
static void
|
||||
draw_combining_char(Screen *self, text_loop_state *s, char_type ch) {
|
||||
bool has_prev_char = false;
|
||||
index_type xpos = 0, ypos = 0;
|
||||
if (self->cursor->x > 0) {
|
||||
ypos = self->cursor->y;
|
||||
xpos = self->cursor->x - 1;
|
||||
has_prev_char = true;
|
||||
} else if (self->cursor->y > 0) {
|
||||
ypos = self->cursor->y - 1;
|
||||
xpos = self->columns - 1;
|
||||
has_prev_char = true;
|
||||
}
|
||||
if (!has_prev_char) return;
|
||||
CPUCell *cp; GPUCell *gp;
|
||||
linebuf_init_cells(self->linebuf, ypos, &cp, &gp);
|
||||
linebuf_init_cells(self->linebuf, s->prev.y, &cp, &gp);
|
||||
index_type xpos = s->prev.x;
|
||||
while (xpos && cp[xpos].is_multicell && cp[xpos].x) xpos--;
|
||||
if (!add_combining_char(self, ch, xpos, ypos) || self->lc->count < 2) return;
|
||||
if (!add_combining_char(self, ch, xpos, s->prev.y) || self->lc->count < 2) return;
|
||||
unsigned base_pos = self->lc->count - 2;
|
||||
if (ch == VS16) { // emoji presentation variation marker makes default text presentation emoji (narrow emoji) into wide emoji
|
||||
CPUCell *cpu_cell = cp + xpos;
|
||||
|
|
@ -988,21 +983,28 @@ draw_combining_char(Screen *self, text_loop_state *s, char_type ch) {
|
|||
CPUCell *second = cp + xpos + 1;
|
||||
if (second->is_multicell) {
|
||||
if (second->y) {
|
||||
move_widened_char_past_multiline_chars(self, cpu_cell, gpu_cell, xpos, ypos);
|
||||
move_widened_char_past_multiline_chars(self, cpu_cell, gpu_cell, xpos, s->prev.y);
|
||||
init_segmentation_state(self, s);
|
||||
return;
|
||||
}
|
||||
nuke_multicell_char_at(self, xpos + 1, ypos, false);
|
||||
nuke_multicell_char_at(self, xpos + 1, s->prev.y, false);
|
||||
}
|
||||
zero_cells(s, second, gp + xpos + 1);
|
||||
self->cursor->x++;
|
||||
*second = *cpu_cell; second->x = 1;
|
||||
} else move_widened_char_past_multiline_chars(self, cpu_cell, gpu_cell, xpos, ypos);
|
||||
} else {
|
||||
move_widened_char_past_multiline_chars(self, cpu_cell, gpu_cell, xpos, s->prev.y);
|
||||
init_segmentation_state(self, s);
|
||||
}
|
||||
}
|
||||
} else if (ch == VS15) {
|
||||
const CPUCell *cpu_cell = cp + xpos;
|
||||
if (self->lc->chars[base_pos + 1] == VS15 && cpu_cell->is_multicell && cpu_cell->width == 2 && is_emoji_presentation_base(self->lc->chars[base_pos])) {
|
||||
index_type deltax = (cpu_cell->scale * cpu_cell->width) / 2;
|
||||
if (halve_multicell_width(self, xpos, ypos)) self->cursor->x -= deltax;
|
||||
if (halve_multicell_width(self, xpos, s->prev.y)) {
|
||||
self->cursor->x -= deltax;
|
||||
init_segmentation_state(self, s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1064,7 +1066,9 @@ draw_control_char(Screen *self, text_loop_state *s, uint32_t ch) {
|
|||
case BEL:
|
||||
screen_bell(self); break;
|
||||
case BS:
|
||||
screen_backspace(self); break;
|
||||
screen_backspace(self);
|
||||
init_segmentation_state(self, s);
|
||||
break;
|
||||
case HT:
|
||||
if (UNLIKELY(self->cursor->x >= self->columns)) {
|
||||
if (self->modes.mDECAWM) {
|
||||
|
|
@ -1080,6 +1084,7 @@ draw_control_char(Screen *self, text_loop_state *s, uint32_t ch) {
|
|||
screen_tab(self);
|
||||
}
|
||||
} else screen_tab(self);
|
||||
init_segmentation_state(self, s);
|
||||
break;
|
||||
case SI:
|
||||
screen_change_charset(self, 0); break;
|
||||
|
|
@ -1090,7 +1095,7 @@ draw_control_char(Screen *self, text_loop_state *s, uint32_t ch) {
|
|||
case FF:
|
||||
screen_linefeed(self); init_text_loop_line(self, s); break;
|
||||
case CR:
|
||||
screen_carriage_return(self); break;
|
||||
screen_carriage_return(self); init_segmentation_state(self, s); break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -1101,35 +1106,37 @@ draw_text_loop(Screen *self, const uint32_t *chars, size_t num_chars, text_loop_
|
|||
init_text_loop_line(self, s);
|
||||
for (size_t i = 0; i < num_chars; i++) {
|
||||
uint32_t ch = map_char(self, chars[i]);
|
||||
if (ch < ' ') {
|
||||
draw_control_char(self, s, ch);
|
||||
CharProps cp = char_props_for(ch);
|
||||
if (cp.is_invalid) {
|
||||
if (ch < ' ') draw_control_char(self, s, ch);
|
||||
continue;
|
||||
}
|
||||
if (self->cursor->x < self->columns && s->cp[self->cursor->x].is_multicell && !char_props_for(ch).is_combining_char) {
|
||||
s->seg = grapheme_segmentation_step(s->seg, cp);
|
||||
if (UNLIKELY(s->seg.add_to_current_cell && s->prev.cc)) {
|
||||
draw_combining_char(self, s, ch);
|
||||
continue;
|
||||
}
|
||||
int char_width = wcwidth_std(cp);
|
||||
if (UNLIKELY(char_width < 1)) {
|
||||
if (char_width == 0) {
|
||||
// check for some zero width chars that we want to preserve for
|
||||
// round tripping that are not added to prev cell by grapheme
|
||||
// segmentation.
|
||||
if (s->prev.cc && (ch == 0xad || ch == 0x200b || ch == 0x2060)) { // soft hyphen, zero width space, word joiner
|
||||
draw_combining_char(self, s, ch);
|
||||
}
|
||||
continue; // we cannot represent zero width chars except as combining chars
|
||||
}
|
||||
char_width = 1;
|
||||
}
|
||||
|
||||
if (self->cursor->x < self->columns && s->cp[self->cursor->x].is_multicell) {
|
||||
if (s->cp[self->cursor->x].y) {
|
||||
move_cursor_past_multicell(self, 1);
|
||||
init_text_loop_line(self, s);
|
||||
} else nuke_multicell_char_at(self, self->cursor->x, self->cursor->y, s->cp[self->cursor->x].x != 0);
|
||||
}
|
||||
|
||||
int char_width = 1;
|
||||
if (ch > DEL) { // not printable ASCII
|
||||
CharProps cp = char_props_for(ch);
|
||||
if (cp.is_invalid) continue;
|
||||
if (UNLIKELY(cp.is_combining_char)) {
|
||||
if (UNLIKELY(is_flag_codepoint(ch))) {
|
||||
if (draw_second_flag_codepoint(self, ch)) continue;
|
||||
} else {
|
||||
draw_combining_char(self, s, ch);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
char_width = wcwidth_std(cp);
|
||||
if (UNLIKELY(char_width < 1)) {
|
||||
if (char_width == 0) continue;
|
||||
char_width = 1;
|
||||
}
|
||||
}
|
||||
self->last_graphic_char = ch;
|
||||
if (UNLIKELY(self->columns < self->cursor->x + (unsigned int)char_width)) {
|
||||
if (self->modes.mDECAWM) {
|
||||
|
|
@ -1161,10 +1168,13 @@ draw_text_loop(Screen *self, const uint32_t *chars, size_t num_chars, text_loop_
|
|||
*fc = (CPUCell){.ch_or_idx=ch, .is_multicell=true, .width=2, .scale=1, .natural_width=true};
|
||||
*second = *fc; second->x = 1;
|
||||
s->gp[self->cursor->x + 1] = s->gp[self->cursor->x];
|
||||
s->prev.y = self->cursor->y; s->prev.x = self->cursor->x; s->prev.cc = fc;
|
||||
self->cursor->x += 2;
|
||||
} else {
|
||||
zero_cells(s, fc, s->gp + self->cursor->x);
|
||||
cell_set_char(fc, ch); self->cursor->x++;
|
||||
cell_set_char(fc, ch);
|
||||
s->prev.y = self->cursor->y; s->prev.x = self->cursor->x; s->prev.cc = fc;
|
||||
self->cursor->x++;
|
||||
fc->is_multicell = false;
|
||||
}
|
||||
}
|
||||
|
|
@ -1276,7 +1286,6 @@ handle_variable_width_multicell_command(Screen *self, CPUCell mcd, ListOfChars *
|
|||
mcd.width = wcswidth_string(lc->chars);
|
||||
if (!mcd.width) { lc->count = 0; return; }
|
||||
handle_fixed_width_multicell_command(self, mcd, lc);
|
||||
lc->count = 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1297,29 +1306,16 @@ screen_handle_multicell_command(Screen *self, const MultiCellCommand *cmd, const
|
|||
if (mcd.width) handle_fixed_width_multicell_command(self, mcd, self->lc);
|
||||
else {
|
||||
RAII_ListOfChars(lc);
|
||||
GraphemeSegmentationResult s; grapheme_segmentation_reset(&s);
|
||||
mcd.natural_width = true;
|
||||
for (unsigned i = 0; i < self->lc->count; i++) {
|
||||
char_type ch = self->lc->chars[i];
|
||||
CharProps cp = char_props_for(ch);
|
||||
if (cp.is_invalid) continue;
|
||||
if (cp.is_combining_char) {
|
||||
if (is_flag_codepoint(ch)) {
|
||||
if (lc.count == 1) {
|
||||
if (is_flag_pair(lc.chars[0], ch)) {
|
||||
lc.chars[lc.count++] = ch; handle_variable_width_multicell_command(self, mcd, &lc);
|
||||
} else {
|
||||
handle_variable_width_multicell_command(self, mcd, &lc); lc.chars[lc.count++] = ch;
|
||||
}
|
||||
} else {
|
||||
handle_variable_width_multicell_command(self, mcd, &lc); lc.chars[lc.count++] = ch;
|
||||
}
|
||||
} else {
|
||||
if (!lc.count) continue;
|
||||
lc.chars[lc.count++] = ch;
|
||||
}
|
||||
} else {
|
||||
if ((s = grapheme_segmentation_step(s, cp)).add_to_current_cell) lc.chars[lc.count++] = ch;
|
||||
else {
|
||||
if (lc.count) handle_variable_width_multicell_command(self, mcd, &lc);
|
||||
lc.chars[lc.count++] = ch;
|
||||
lc.chars[0] = ch; lc.count = 1;
|
||||
}
|
||||
}
|
||||
if (lc.count) handle_variable_width_multicell_command(self, mcd, &lc);
|
||||
|
|
|
|||
|
|
@ -84,8 +84,10 @@ class TestScreen(BaseTest):
|
|||
|
||||
# Now test in insert mode
|
||||
s.reset(), s.reset_dirty()
|
||||
text = '1\u03062345'
|
||||
s.set_mode(IRM)
|
||||
s.draw('1\u03062345' * 5)
|
||||
s.draw(text * 5)
|
||||
self.ae(str(s.line(0)), text)
|
||||
s.cursor_back(5)
|
||||
self.ae(s.cursor.x, 0), self.ae(s.cursor.y, 4)
|
||||
s.reset_dirty()
|
||||
|
|
@ -127,9 +129,9 @@ class TestScreen(BaseTest):
|
|||
q = '\U0001f468\u200d\U0001f469\u200d\U0001f467\u200d\U0001f466'
|
||||
s.draw(q)
|
||||
self.ae(q, str(s.line(0)))
|
||||
self.ae(s.cursor.x, 8)
|
||||
self.ae(s.cursor.x, 2)
|
||||
for x in '\u200b\u200c\u200d':
|
||||
s = self.create_screen()
|
||||
s.reset()
|
||||
q = f'X{x}Y'
|
||||
s.draw(q)
|
||||
self.ae(q, str(s.line(0)))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue