From d429f732e1c7a95e205830f56ab1d80098e568f3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 25 Mar 2025 13:45:56 +0530 Subject: [PATCH] DRYer --- gen/wcwidth.py | 14 +++++++++++-- kitty/char-props-data.h | 32 +++++++++++++++++++++++++++++ tools/wcswidth/char-props-data.go | 34 +++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/gen/wcwidth.py b/gen/wcwidth.py index 2cd550b61..2899a1306 100755 --- a/gen/wcwidth.py +++ b/gen/wcwidth.py @@ -12,6 +12,7 @@ from contextlib import contextmanager from functools import lru_cache, partial from html.entities import html5 from itertools import groupby +from math import ceil, log from operator import itemgetter from typing import ( Callable, @@ -598,15 +599,21 @@ def gen_multistage_table( width_shift = 4 + +def bitsize(maxval: int) -> int: # number of bits needed to store maxval + return ceil(log(maxval, 2)) + + class CharProps(NamedTuple): width: int = 3 - grapheme_break: str = '4' - indic_conjunct_break: str = '2' + grapheme_break: str = '' # set at runtime + indic_conjunct_break: str = '' # set at runtime is_extended_pictographic: bool = True is_emoji: bool = True is_emoji_presentation_base: bool = True + # derived properties for fast lookup is_invalid: bool = True is_non_rendered: bool = True is_symbol: bool = True @@ -693,6 +700,8 @@ def generate_enum(p: Callable[..., None], gp: Callable[..., None], name: str, *i def gen_char_props() -> None: + CharProps._field_defaults['grapheme_break'] = str(bitsize(len(grapheme_segmentation_maps) + 2)) + CharProps._field_defaults['indic_conjunct_break'] = str(bitsize(len(incb_map) + 1)) invalid = class_maps['Cc'] | class_maps['Cs'] non_printing = invalid | class_maps['Cf'] width_map: dict[int, int] = {} @@ -732,6 +741,7 @@ def gen_char_props() -> None: gp('package wcswidth') generate_enum(c, gp, 'GraphemeBreakProperty', 'AtStart', 'None', *grapheme_segmentation_maps, prefix='GBP_') generate_enum(c, gp, 'IndicConjunctBreak', 'None', *incb_map, prefix='ICB_') + generate_enum(c, gp, 'UnicodeCategory', *class_maps, prefix='UC_') bf = make_bitfield('tools/wcswidth', 'CharProps', *CharProps().go_fields, add_package=False)[1] gp(bf) gp(f''' diff --git a/kitty/char-props-data.h b/kitty/char-props-data.h index 6866f0905..1f5be9fdd 100644 --- a/kitty/char-props-data.h +++ b/kitty/char-props-data.h @@ -28,6 +28,38 @@ typedef enum IndicConjunctBreak { ICB_Extend, } IndicConjunctBreak; +typedef enum UnicodeCategory { + UC_Cc, + UC_Zs, + UC_Po, + UC_Sc, + UC_Ps, + UC_Pe, + UC_Sm, + UC_Pd, + UC_Nd, + UC_Lu, + UC_Sk, + UC_Pc, + UC_Ll, + UC_So, + UC_Lo, + UC_Pi, + UC_Cf, + UC_No, + UC_Pf, + UC_Lt, + UC_Lm, + UC_Mn, + UC_Me, + UC_Mc, + UC_Nl, + UC_Zl, + UC_Zp, + UC_Cs, + UC_Co, +} UnicodeCategory; + static const char_type CharProps_mask = 255u; static const char_type CharProps_shift = 8u; static const uint8_t CharProps_t1[4352] = { diff --git a/tools/wcswidth/char-props-data.go b/tools/wcswidth/char-props-data.go index 332644aa5..b2121b92e 100644 --- a/tools/wcswidth/char-props-data.go +++ b/tools/wcswidth/char-props-data.go @@ -29,6 +29,40 @@ const ( ICB_Extend ) +type UnicodeCategory uint8 + +const ( + UC_Cc UnicodeCategory = iota + UC_Zs + UC_Po + UC_Sc + UC_Ps + UC_Pe + UC_Sm + UC_Pd + UC_Nd + UC_Lu + UC_Sk + UC_Pc + UC_Ll + UC_So + UC_Lo + UC_Pi + UC_Cf + UC_No + UC_Pf + UC_Lt + UC_Lm + UC_Mn + UC_Me + UC_Mc + UC_Nl + UC_Zl + UC_Zp + UC_Cs + UC_Co +) + // Total number of bits used: 16 type CharProps uint16