mirror of
https://github.com/kovidgoyal/kitty.git
synced 2026-06-24 18:07:01 +00:00
Make print_register useable without full debug
This commit is contained in:
parent
fa3579656b
commit
79f99bb3ad
1 changed files with 19 additions and 16 deletions
|
|
@ -167,7 +167,6 @@ static inline integer_t shuffle_impl256(const integer_t value, const integer_t s
|
|||
#define sum_bytes(x) (sum_bytes_128(simde_mm256_extracti128_si256(x, 0)) + sum_bytes_128(simde_mm256_extracti128_si256(x, 1)))
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#define print_register_as_bytes(r) { \
|
||||
printf("%s:\n", #r); \
|
||||
alignas(64) uint8_t data[sizeof(r)]; \
|
||||
|
|
@ -178,9 +177,12 @@ static inline integer_t shuffle_impl256(const integer_t value, const integer_t s
|
|||
} \
|
||||
printf("\n"); \
|
||||
}
|
||||
|
||||
#if 0
|
||||
#define debug_register print_register_as_bytes
|
||||
#define debug printf
|
||||
#else
|
||||
#define print_register_as_bytes(r)
|
||||
#define debug_register(...)
|
||||
#define debug(...)
|
||||
#endif
|
||||
|
||||
|
|
@ -379,7 +381,7 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
if (src_sz < sizeof(integer_t)) vec = zero_last_n_bytes(vec, sizeof(integer_t) - src_sz);
|
||||
|
||||
// Check if we have pure ASCII and use fast path
|
||||
print_register_as_bytes(vec);
|
||||
debug_register(vec);
|
||||
int32_t ascii_mask = movemask_epi8(vec);
|
||||
if (!ascii_mask) { // no bytes with high bit (0x80) set, so just plain ASCII
|
||||
FUNC(output_plain_ascii)(d, vec, src_sz);
|
||||
|
|
@ -398,11 +400,11 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
const integer_t bytes_indicating_start_of_four_byte_sequence = cmplt_epi8(set1_epi8(0xf0 - 1 - 0x80), vec_signed);
|
||||
state = blendv_epi8(state, set1_epi8(0xf4), bytes_indicating_start_of_four_byte_sequence);
|
||||
// state now has 0xc2 on all bytes that start a 2 byte sequence, 0xe3 on start of 3-byte sequence, 0xf4 on 4-byte start and 0x80 on rest
|
||||
print_register_as_bytes(state);
|
||||
debug_register(state);
|
||||
integer_t mask = and_si(state, set1_epi8(0xf8)); // keep upper 5 bits of state
|
||||
print_register_as_bytes(mask);
|
||||
debug_register(mask);
|
||||
integer_t count = and_si(state, set1_epi8(0x7)); // keep lower 3 bits of state
|
||||
print_register_as_bytes(count);
|
||||
debug_register(count);
|
||||
const integer_t zero = create_zero_integer(), one = set1_epi8(1), two = set1_epi8(2), three = set1_epi8(3);
|
||||
// count contains the number of bytes in the sequence for the start byte of every sequence and zero elsewhere
|
||||
// shift 02 bytes by 1 and subtract 1
|
||||
|
|
@ -411,7 +413,7 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
// shift 03 and 04 bytes by 2 and subtract 2
|
||||
counts = add_epi8(counts, shift_right_by_two_bytes(subtract_saturate_epu8(counts, two)));
|
||||
// counts now contains the number of bytes remaining in each utf-8 sequence of 2 or more bytes
|
||||
print_register_as_bytes(counts);
|
||||
debug_register(counts);
|
||||
// Only ASCII chars should have corresponding byte of counts == 0
|
||||
if (ascii_mask ^ movemask_epi8(cmpgt_epi8(counts, zero))) goto invalid_utf8;
|
||||
// The difference between a byte in counts and the next one should be negative,
|
||||
|
|
@ -421,14 +423,14 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
// Process the bytes storing the three resulting bytes that make up the unicode codepoint
|
||||
// mask all control bits so that we have only useful bits left
|
||||
vec = andnot_si(mask, vec);
|
||||
print_register_as_bytes(vec);
|
||||
debug_register(vec);
|
||||
|
||||
// Now calculate the three output vectors
|
||||
|
||||
// The lowest byte is made up of 6 bits from locations with counts == 1 and the lowest two bits from locations with count == 2
|
||||
// In addition, the ASCII bytes are copied unchanged from vec
|
||||
integer_t vec_non_ascii = andnot_si(cmpeq_epi8(counts, zero), vec);
|
||||
print_register_as_bytes(vec_non_ascii);
|
||||
debug_register(vec_non_ascii);
|
||||
integer_t vec_right1 = shift_right_by_one_byte(vec_non_ascii);
|
||||
integer_t output1 = blendv_epi8(vec,
|
||||
or_si(
|
||||
|
|
@ -436,7 +438,7 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
),
|
||||
cmpeq_epi8(counts, one)
|
||||
);
|
||||
print_register_as_bytes(output1);
|
||||
debug_register(output1);
|
||||
|
||||
// The next byte is made up of 4 bits (5, 4, 3, 2) from locations with count == 2 and the first 4 bits from locations with count == 3
|
||||
integer_t count2_locations = cmpeq_epi8(counts, two);
|
||||
|
|
@ -445,7 +447,7 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
output2 = or_si(output2, and_si(shift_left_by_bits16(vec_right1, 4), set1_epi8(0xf0))); // move 4 bits left and mask lower four bits and OR
|
||||
output2 = and_si(output2, count2_locations); // keep only the count2 bytes
|
||||
output2 = shift_right_by_one_byte(output2);
|
||||
print_register_as_bytes(output2);
|
||||
debug_register(output2);
|
||||
|
||||
// The last byte is made up of bits 5 and 6 from count == 3 and 3 bits from count == 4
|
||||
integer_t count3_locations = cmpeq_epi8(counts, three);
|
||||
|
|
@ -454,7 +456,7 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
output3 = or_si(output3, and_si(set1_epi8(0xfc), shift_left_by_bits16(vec_right1, 2)));
|
||||
output3 = and_si(output3, count3_locations); // keep only count3 bytes
|
||||
output3 = shift_right_by_two_bytes(output3);
|
||||
print_register_as_bytes(output3);
|
||||
debug_register(output3);
|
||||
|
||||
// Shuffle bytes to remove continuation bytes
|
||||
integer_t shifts = count_subs1; // number of bytes we need to skip for each UTF-8 sequence
|
||||
|
|
@ -485,14 +487,14 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
#undef move
|
||||
// convert the shifts into a suitable mask for shuffle by adding the byte number to each byte
|
||||
shifts = add_epi8(shifts, numbered_bytes());
|
||||
print_register_as_bytes(shifts);
|
||||
debug_register(shifts);
|
||||
|
||||
output1 = shuffle_epi8(output1, shifts);
|
||||
output2 = shuffle_epi8(output2, shifts);
|
||||
output3 = shuffle_epi8(output3, shifts);
|
||||
print_register_as_bytes(output1);
|
||||
print_register_as_bytes(output2);
|
||||
print_register_as_bytes(output3);
|
||||
debug_register(output1);
|
||||
debug_register(output2);
|
||||
debug_register(output3);
|
||||
|
||||
const unsigned num_of_discarded_bytes = sum_bytes(count_subs1);
|
||||
const unsigned num_codepoints = src_sz - num_of_discarded_bytes;
|
||||
|
|
@ -548,6 +550,7 @@ invalid_utf8:
|
|||
#undef zero_last_n_bytes
|
||||
#undef sum_bytes
|
||||
#undef is_zero
|
||||
#undef print_register_as_bytes
|
||||
#ifndef SIMD_STRING_IMPL_INCLUDED_ONCE
|
||||
#define SIMD_STRING_IMPL_INCLUDED_ONCE
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue