Add not_index_byte and not_index_byte2 functions to simdstring package

Fixes #9646
This commit is contained in:
copilot-swe-agent[bot] 2026-03-12 07:39:02 +00:00 committed by Kovid Goyal
parent d8af7e2c88
commit f4bf9cf1c9
No known key found for this signature in database
GPG key ID: 06BC317B515ACE7C
5 changed files with 231 additions and 0 deletions

View file

@ -64,3 +64,45 @@ func BenchmarkIndexByte2(b *testing.B) {
t(pos, "scalar")
}
}
func BenchmarkNotIndexByte(b *testing.B) {
t := func(pos int, which string) {
// Fill with 'a' and place 'q' (a non-matching byte) at the target position
data := haystack('a', 'q', pos)
f := NotIndexByte
switch which {
case "scalar":
f = not_index_byte_scalar
}
b.Run(fmt.Sprintf("%s_sz=%d", which, pos), func(b *testing.B) {
for b.Loop() {
f(data, 'a')
}
})
}
for _, pos := range sizes {
t(pos, "simdstring")
t(pos, "scalar")
}
}
func BenchmarkNotIndexByte2(b *testing.B) {
t := func(pos int, which string) {
// Fill with 'a' and place 'q' (neither 'a' nor 'x') at the target position
data := haystack('a', 'q', pos)
f := NotIndexByte2
switch which {
case "scalar":
f = not_index_byte2_scalar
}
b.Run(fmt.Sprintf("%s_sz=%d", which, pos), func(b *testing.B) {
for b.Loop() {
f(data, 'a', 'x')
}
})
}
for _, pos := range sizes {
t(pos, "simdstring")
t(pos, "scalar")
}
}

View file

@ -404,6 +404,12 @@ func encode_cmgt16b(a, b, dest Register) (ans uint32) {
return 0x271<<21 | b.ARMId()<<16 | 0xd<<10 | a.ARMId()<<5 | dest.ARMId()
}
func encode_not16b(src, dest Register) uint32 {
// NOT Vd.16B, Vn.16B (alias of MVN)
// Encoding: 0 Q 1 01110 size 10000 00101 10 Rn Rd (Q=1, size=00 for .16B)
return 0x6E205800 | (src.ARMId() << 5) | dest.ARMId()
}
func (f *Function) MaskForCountDestructive(vec, ans Register) {
// vec is clobbered by this function
f.Comment("Count the number of bytes to the first 0xff byte and put the result in", ans)
@ -688,6 +694,24 @@ func (f *Function) Or(a, b, dest Register) {
f.AddTrailingComment(dest, "=", a, "|", b, "(bitwise)")
}
func (f *Function) NotSelf(r Register) {
if f.ISA.Goarch == ARM64 {
f.Comment("Go assembler doesn't support the VMVN instruction, below we have: NOT", r.ARMFullWidth()+",", r.ARMFullWidth())
f.instr("WORD", fmt.Sprintf("$0x%x", encode_not16b(r, r)))
f.AddTrailingComment(r, "= ~", r, "(bitwise NOT)")
return
}
all_ones := f.Vec(r.Size)
defer f.ReleaseReg(all_ones)
f.AllOnesRegister(all_ones)
if r.Size == 128 {
f.instr("PXOR", all_ones, r)
} else {
f.instr("VPXOR", all_ones, r, r)
}
f.AddTrailingComment(r, "= ~", r, "(bitwise NOT)")
}
func (f *Function) And(a, b, dest Register) {
if f.ISA.Goarch == ARM64 {
f.instr("VAND", a.ARMFullWidth(), b.ARMFullWidth(), dest.ARMFullWidth())
@ -1504,6 +1528,54 @@ func (s *State) indexc0() {
}
func (s *State) not_index_byte_body(f *Function) {
b := f.Vec()
f.Set1Epi8("b", b)
test_bytes := func(bytes_to_test, test_ans Register) {
f.CmpEqEpi8(bytes_to_test, b, test_ans)
f.NotSelf(test_ans)
}
s.index_func(f, test_bytes)
}
func (s *State) not_index_byte() {
f := s.NewFunction("not_index_byte_asm", "Find the index of the first byte that is not b", []FunctionParam{{"data", ByteSlice}, {"b", types.Byte}}, []FunctionParam{{"ans", types.Int}})
if s.ISA.HasSIMD {
s.not_index_byte_body(f)
}
f = s.NewFunction("not_index_byte_string_asm", "Find the index of the first byte that is not b", []FunctionParam{{"data", types.String}, {"b", types.Byte}}, []FunctionParam{{"ans", types.Int}})
if s.ISA.HasSIMD {
s.not_index_byte_body(f)
}
}
func (s *State) not_index_byte2_body(f *Function) {
b1 := f.Vec()
b2 := f.Vec()
f.Set1Epi8("b1", b1)
f.Set1Epi8("b2", b2)
test_bytes := func(bytes_to_test, test_ans Register) {
f.CmpEqEpi8(bytes_to_test, b1, test_ans)
f.CmpEqEpi8(bytes_to_test, b2, bytes_to_test)
f.Or(test_ans, bytes_to_test, test_ans)
f.NotSelf(test_ans)
}
s.index_func(f, test_bytes)
}
func (s *State) not_index_byte2() {
f := s.NewFunction("not_index_byte2_asm", "Find the index of the first byte that is neither b1 nor b2", []FunctionParam{{"data", ByteSlice}, {"b1", types.Byte}, {"b2", types.Byte}}, []FunctionParam{{"ans", types.Int}})
if s.ISA.HasSIMD {
s.not_index_byte2_body(f)
}
f = s.NewFunction("not_index_byte2_string_asm", "Find the index of the first byte that is neither b1 nor b2", []FunctionParam{{"data", types.String}, {"b1", types.Byte}, {"b2", types.Byte}}, []FunctionParam{{"ans", types.Int}})
if s.ISA.HasSIMD {
s.not_index_byte2_body(f)
}
}
func (s *State) Generate() {
s.test_load()
s.test_set1_epi8()
@ -1516,6 +1588,8 @@ func (s *State) Generate() {
s.indexbyte2()
s.indexc0()
s.indexbyte()
s.not_index_byte()
s.not_index_byte2()
s.OutputFunction()
}

View file

@ -33,6 +33,18 @@ var IndexC0 func(data []byte) int = index_c0_scalar
// Return the index at which the first C0 byte is found or -1 when no such bytes are present.
var IndexC0String func(data string) int = index_c0_string_scalar
// Return the index of the first byte in data that is not equal to b. If all bytes equal b, -1 is returned.
var NotIndexByte func(data []byte, b byte) int = not_index_byte_scalar
// Return the index of the first byte in text that is not equal to b. If all bytes equal b, -1 is returned.
var NotIndexByteString func(text string, b byte) int = not_index_byte_string_scalar
// Return the index of the first byte in data that is neither a nor b. If all bytes are a or b, -1 is returned.
var NotIndexByte2 func(data []byte, a, b byte) int = not_index_byte2_scalar
// Return the index of the first byte in text that is neither a nor b. If all bytes are a or b, -1 is returned.
var NotIndexByte2String func(text string, a, b byte) int = not_index_byte2_string_scalar
func init() {
switch runtime.GOARCH {
case "amd64":
@ -51,6 +63,10 @@ func init() {
IndexByte2String = index_byte2_string_asm_256
IndexC0 = index_c0_asm_256
IndexC0String = index_c0_string_asm_256
NotIndexByte = not_index_byte_asm_256
NotIndexByteString = not_index_byte_string_asm_256
NotIndexByte2 = not_index_byte2_asm_256
NotIndexByte2String = not_index_byte2_string_asm_256
VectorSize = 32
} else if Have128bit {
IndexByte = index_byte_asm_128
@ -59,6 +75,10 @@ func init() {
IndexByte2String = index_byte2_string_asm_128
IndexC0 = index_c0_asm_128
IndexC0String = index_c0_string_asm_128
NotIndexByte = not_index_byte_asm_128
NotIndexByteString = not_index_byte_string_asm_128
NotIndexByte2 = not_index_byte2_asm_128
NotIndexByte2String = not_index_byte2_string_asm_128
VectorSize = 16
}
}

View file

@ -244,6 +244,65 @@ func TestSIMDStringOps(t *testing.T) {
index_test([]byte("abc"), 'x')
index_test([]byte("abc"), 'b')
not_index_test := func(haystack []byte, needle byte) {
var actual int
expected := not_index_byte_scalar(haystack, needle)
for _, sz := range sizes {
switch sz {
case 16:
actual = not_index_byte_asm_128(haystack, needle)
case 32:
actual = not_index_byte_asm_256(haystack, needle)
}
if actual != expected {
t.Fatalf("not_index failed in: %#v (%d != %d) at size: %d with needle: %#v", string(haystack), expected, actual, sz, needle)
}
}
}
not_index_test(nil, 'a')
not_index_test([]byte{}, 'a')
not_index_test([]byte("aaa"), 'a')
not_index_test([]byte("aaab"), 'a')
not_index_test([]byte("baaa"), 'a')
not_index_test([]byte("abc"), 'a')
for _, sz := range []int{0, 16, 32, 64, 79} {
q := strings.Repeat("a", sz) + "b"
not_index_test([]byte(q), 'a')
not_index_test([]byte(q), 'b')
not_index_test([]byte(strings.Repeat("a", sz)), 'a')
}
not_index2_test := func(haystack []byte, a, b byte) {
var actual int
expected := not_index_byte2_scalar(haystack, a, b)
for _, sz := range sizes {
switch sz {
case 16:
actual = not_index_byte2_asm_128(haystack, a, b)
case 32:
actual = not_index_byte2_asm_256(haystack, a, b)
}
if actual != expected {
t.Fatalf("not_index2 failed in: %#v (%d != %d) at size: %d with needles: %#v %#v", string(haystack), expected, actual, sz, a, b)
}
}
}
not_index2_test(nil, 'a', 'b')
not_index2_test([]byte{}, 'a', 'b')
not_index2_test([]byte("aabb"), 'a', 'b')
not_index2_test([]byte("aabbc"), 'a', 'b')
not_index2_test([]byte("caabb"), 'a', 'b')
for _, sz := range []int{0, 16, 32, 64, 79} {
q := strings.Repeat("ab", sz) + "c"
not_index2_test([]byte(q), 'a', 'b')
not_index2_test([]byte(strings.Repeat("ab", sz)), 'a', 'b')
for align := range 32 {
not_index2_test([]byte(strings.Repeat(" ", align)+q), 'a', 'b')
}
}
}
func TestIntrinsics(t *testing.T) {

View file

@ -57,3 +57,39 @@ func index_c0_string_scalar(data string) int {
}
return -1
}
func not_index_byte_scalar(data []byte, b byte) int {
for i, ch := range data {
if ch != b {
return i
}
}
return -1
}
func not_index_byte_string_scalar(data string, b byte) int {
for i := 0; i < len(data); i++ {
if data[i] != b {
return i
}
}
return -1
}
func not_index_byte2_scalar(data []byte, a, b byte) int {
for i, ch := range data {
if ch != a && ch != b {
return i
}
}
return -1
}
func not_index_byte2_string_scalar(data string, a, b byte) int {
for i := 0; i < len(data); i++ {
if data[i] != a && data[i] != b {
return i
}
}
return -1
}