From f4bf9cf1c97eec8a5de461640b32d2d1500017be Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 07:39:02 +0000 Subject: [PATCH] Add not_index_byte and not_index_byte2 functions to simdstring package Fixes #9646 --- tools/simdstring/benchmarks_test.go | 42 ++++++++++++++++ tools/simdstring/generate.go | 74 +++++++++++++++++++++++++++++ tools/simdstring/intrinsics.go | 20 ++++++++ tools/simdstring/intrinsics_test.go | 59 +++++++++++++++++++++++ tools/simdstring/scalar.go | 36 ++++++++++++++ 5 files changed, 231 insertions(+) diff --git a/tools/simdstring/benchmarks_test.go b/tools/simdstring/benchmarks_test.go index 37587962b..590074102 100644 --- a/tools/simdstring/benchmarks_test.go +++ b/tools/simdstring/benchmarks_test.go @@ -64,3 +64,45 @@ func BenchmarkIndexByte2(b *testing.B) { t(pos, "scalar") } } + +func BenchmarkNotIndexByte(b *testing.B) { + t := func(pos int, which string) { + // Fill with 'a' and place 'q' (a non-matching byte) at the target position + data := haystack('a', 'q', pos) + f := NotIndexByte + switch which { + case "scalar": + f = not_index_byte_scalar + } + b.Run(fmt.Sprintf("%s_sz=%d", which, pos), func(b *testing.B) { + for b.Loop() { + f(data, 'a') + } + }) + } + for _, pos := range sizes { + t(pos, "simdstring") + t(pos, "scalar") + } +} + +func BenchmarkNotIndexByte2(b *testing.B) { + t := func(pos int, which string) { + // Fill with 'a' and place 'q' (neither 'a' nor 'x') at the target position + data := haystack('a', 'q', pos) + f := NotIndexByte2 + switch which { + case "scalar": + f = not_index_byte2_scalar + } + b.Run(fmt.Sprintf("%s_sz=%d", which, pos), func(b *testing.B) { + for b.Loop() { + f(data, 'a', 'x') + } + }) + } + for _, pos := range sizes { + t(pos, "simdstring") + t(pos, "scalar") + } +} diff --git a/tools/simdstring/generate.go b/tools/simdstring/generate.go index c0830d01c..5e7f5225f 100644 --- a/tools/simdstring/generate.go +++ b/tools/simdstring/generate.go @@ -404,6 +404,12 @@ func encode_cmgt16b(a, b, dest Register) (ans uint32) { return 0x271<<21 | b.ARMId()<<16 | 0xd<<10 | a.ARMId()<<5 | dest.ARMId() } +func encode_not16b(src, dest Register) uint32 { + // NOT Vd.16B, Vn.16B (alias of MVN) + // Encoding: 0 Q 1 01110 size 10000 00101 10 Rn Rd (Q=1, size=00 for .16B) + return 0x6E205800 | (src.ARMId() << 5) | dest.ARMId() +} + func (f *Function) MaskForCountDestructive(vec, ans Register) { // vec is clobbered by this function f.Comment("Count the number of bytes to the first 0xff byte and put the result in", ans) @@ -688,6 +694,24 @@ func (f *Function) Or(a, b, dest Register) { f.AddTrailingComment(dest, "=", a, "|", b, "(bitwise)") } +func (f *Function) NotSelf(r Register) { + if f.ISA.Goarch == ARM64 { + f.Comment("Go assembler doesn't support the VMVN instruction, below we have: NOT", r.ARMFullWidth()+",", r.ARMFullWidth()) + f.instr("WORD", fmt.Sprintf("$0x%x", encode_not16b(r, r))) + f.AddTrailingComment(r, "= ~", r, "(bitwise NOT)") + return + } + all_ones := f.Vec(r.Size) + defer f.ReleaseReg(all_ones) + f.AllOnesRegister(all_ones) + if r.Size == 128 { + f.instr("PXOR", all_ones, r) + } else { + f.instr("VPXOR", all_ones, r, r) + } + f.AddTrailingComment(r, "= ~", r, "(bitwise NOT)") +} + func (f *Function) And(a, b, dest Register) { if f.ISA.Goarch == ARM64 { f.instr("VAND", a.ARMFullWidth(), b.ARMFullWidth(), dest.ARMFullWidth()) @@ -1504,6 +1528,54 @@ func (s *State) indexc0() { } +func (s *State) not_index_byte_body(f *Function) { + b := f.Vec() + f.Set1Epi8("b", b) + test_bytes := func(bytes_to_test, test_ans Register) { + f.CmpEqEpi8(bytes_to_test, b, test_ans) + f.NotSelf(test_ans) + } + s.index_func(f, test_bytes) +} + +func (s *State) not_index_byte() { + f := s.NewFunction("not_index_byte_asm", "Find the index of the first byte that is not b", []FunctionParam{{"data", ByteSlice}, {"b", types.Byte}}, []FunctionParam{{"ans", types.Int}}) + if s.ISA.HasSIMD { + s.not_index_byte_body(f) + } + f = s.NewFunction("not_index_byte_string_asm", "Find the index of the first byte that is not b", []FunctionParam{{"data", types.String}, {"b", types.Byte}}, []FunctionParam{{"ans", types.Int}}) + if s.ISA.HasSIMD { + s.not_index_byte_body(f) + } + +} + +func (s *State) not_index_byte2_body(f *Function) { + b1 := f.Vec() + b2 := f.Vec() + f.Set1Epi8("b1", b1) + f.Set1Epi8("b2", b2) + test_bytes := func(bytes_to_test, test_ans Register) { + f.CmpEqEpi8(bytes_to_test, b1, test_ans) + f.CmpEqEpi8(bytes_to_test, b2, bytes_to_test) + f.Or(test_ans, bytes_to_test, test_ans) + f.NotSelf(test_ans) + } + s.index_func(f, test_bytes) +} + +func (s *State) not_index_byte2() { + f := s.NewFunction("not_index_byte2_asm", "Find the index of the first byte that is neither b1 nor b2", []FunctionParam{{"data", ByteSlice}, {"b1", types.Byte}, {"b2", types.Byte}}, []FunctionParam{{"ans", types.Int}}) + if s.ISA.HasSIMD { + s.not_index_byte2_body(f) + } + f = s.NewFunction("not_index_byte2_string_asm", "Find the index of the first byte that is neither b1 nor b2", []FunctionParam{{"data", types.String}, {"b1", types.Byte}, {"b2", types.Byte}}, []FunctionParam{{"ans", types.Int}}) + if s.ISA.HasSIMD { + s.not_index_byte2_body(f) + } + +} + func (s *State) Generate() { s.test_load() s.test_set1_epi8() @@ -1516,6 +1588,8 @@ func (s *State) Generate() { s.indexbyte2() s.indexc0() s.indexbyte() + s.not_index_byte() + s.not_index_byte2() s.OutputFunction() } diff --git a/tools/simdstring/intrinsics.go b/tools/simdstring/intrinsics.go index 413a9e3a3..71de1df8b 100644 --- a/tools/simdstring/intrinsics.go +++ b/tools/simdstring/intrinsics.go @@ -33,6 +33,18 @@ var IndexC0 func(data []byte) int = index_c0_scalar // Return the index at which the first C0 byte is found or -1 when no such bytes are present. var IndexC0String func(data string) int = index_c0_string_scalar +// Return the index of the first byte in data that is not equal to b. If all bytes equal b, -1 is returned. +var NotIndexByte func(data []byte, b byte) int = not_index_byte_scalar + +// Return the index of the first byte in text that is not equal to b. If all bytes equal b, -1 is returned. +var NotIndexByteString func(text string, b byte) int = not_index_byte_string_scalar + +// Return the index of the first byte in data that is neither a nor b. If all bytes are a or b, -1 is returned. +var NotIndexByte2 func(data []byte, a, b byte) int = not_index_byte2_scalar + +// Return the index of the first byte in text that is neither a nor b. If all bytes are a or b, -1 is returned. +var NotIndexByte2String func(text string, a, b byte) int = not_index_byte2_string_scalar + func init() { switch runtime.GOARCH { case "amd64": @@ -51,6 +63,10 @@ func init() { IndexByte2String = index_byte2_string_asm_256 IndexC0 = index_c0_asm_256 IndexC0String = index_c0_string_asm_256 + NotIndexByte = not_index_byte_asm_256 + NotIndexByteString = not_index_byte_string_asm_256 + NotIndexByte2 = not_index_byte2_asm_256 + NotIndexByte2String = not_index_byte2_string_asm_256 VectorSize = 32 } else if Have128bit { IndexByte = index_byte_asm_128 @@ -59,6 +75,10 @@ func init() { IndexByte2String = index_byte2_string_asm_128 IndexC0 = index_c0_asm_128 IndexC0String = index_c0_string_asm_128 + NotIndexByte = not_index_byte_asm_128 + NotIndexByteString = not_index_byte_string_asm_128 + NotIndexByte2 = not_index_byte2_asm_128 + NotIndexByte2String = not_index_byte2_string_asm_128 VectorSize = 16 } } diff --git a/tools/simdstring/intrinsics_test.go b/tools/simdstring/intrinsics_test.go index 23f29aec3..84e19f4e4 100644 --- a/tools/simdstring/intrinsics_test.go +++ b/tools/simdstring/intrinsics_test.go @@ -244,6 +244,65 @@ func TestSIMDStringOps(t *testing.T) { index_test([]byte("abc"), 'x') index_test([]byte("abc"), 'b') + not_index_test := func(haystack []byte, needle byte) { + var actual int + expected := not_index_byte_scalar(haystack, needle) + + for _, sz := range sizes { + switch sz { + case 16: + actual = not_index_byte_asm_128(haystack, needle) + case 32: + actual = not_index_byte_asm_256(haystack, needle) + } + if actual != expected { + t.Fatalf("not_index failed in: %#v (%d != %d) at size: %d with needle: %#v", string(haystack), expected, actual, sz, needle) + } + } + } + not_index_test(nil, 'a') + not_index_test([]byte{}, 'a') + not_index_test([]byte("aaa"), 'a') + not_index_test([]byte("aaab"), 'a') + not_index_test([]byte("baaa"), 'a') + not_index_test([]byte("abc"), 'a') + for _, sz := range []int{0, 16, 32, 64, 79} { + q := strings.Repeat("a", sz) + "b" + not_index_test([]byte(q), 'a') + not_index_test([]byte(q), 'b') + not_index_test([]byte(strings.Repeat("a", sz)), 'a') + } + + not_index2_test := func(haystack []byte, a, b byte) { + var actual int + expected := not_index_byte2_scalar(haystack, a, b) + + for _, sz := range sizes { + switch sz { + case 16: + actual = not_index_byte2_asm_128(haystack, a, b) + case 32: + actual = not_index_byte2_asm_256(haystack, a, b) + } + if actual != expected { + t.Fatalf("not_index2 failed in: %#v (%d != %d) at size: %d with needles: %#v %#v", string(haystack), expected, actual, sz, a, b) + } + } + } + not_index2_test(nil, 'a', 'b') + not_index2_test([]byte{}, 'a', 'b') + not_index2_test([]byte("aabb"), 'a', 'b') + not_index2_test([]byte("aabbc"), 'a', 'b') + not_index2_test([]byte("caabb"), 'a', 'b') + for _, sz := range []int{0, 16, 32, 64, 79} { + q := strings.Repeat("ab", sz) + "c" + not_index2_test([]byte(q), 'a', 'b') + not_index2_test([]byte(strings.Repeat("ab", sz)), 'a', 'b') + for align := range 32 { + not_index2_test([]byte(strings.Repeat(" ", align)+q), 'a', 'b') + } + } + } func TestIntrinsics(t *testing.T) { diff --git a/tools/simdstring/scalar.go b/tools/simdstring/scalar.go index 3044740ce..811bf7260 100644 --- a/tools/simdstring/scalar.go +++ b/tools/simdstring/scalar.go @@ -57,3 +57,39 @@ func index_c0_string_scalar(data string) int { } return -1 } + +func not_index_byte_scalar(data []byte, b byte) int { + for i, ch := range data { + if ch != b { + return i + } + } + return -1 +} + +func not_index_byte_string_scalar(data string, b byte) int { + for i := 0; i < len(data); i++ { + if data[i] != b { + return i + } + } + return -1 +} + +func not_index_byte2_scalar(data []byte, a, b byte) int { + for i, ch := range data { + if ch != a && ch != b { + return i + } + } + return -1 +} + +func not_index_byte2_string_scalar(data string, a, b byte) int { + for i := 0; i < len(data); i++ { + if data[i] != a && data[i] != b { + return i + } + } + return -1 +}