mmorton/matcher.go

## matcher.go
package resource

import (
	"errors"
	"regexp"
	"strings"
)

var matchStringRE = regexp.MustCompile(`(?i)^(\w+?|\*)((:(\w+|\*|\*\*))*?:(\w+|\*)|)$`)

func ValidateMatchString(has string) error {
	if !matchStringRE.MatchString(has) {
		return errors.New("invalid match string")
	}
	if strings.Count(has, "**") > 1 {
		return errors.New("invalid match string")
	}
	return nil
}

func NormalizeMatchString(has string) string {
	return strings.ToLower(has)
}

// can only can match terminal wildcards
func DoesMatchRequiredPrefix(has, req string) bool {
	if has[len(has)-1] == '*' {
		return strings.HasPrefix(req, has[:len(has)-1])
	} else {
		return has == req
	}
}

// only useful if there is a large number of prefix only (terminal wildcard) matches compared to complex ones
func DoesMatchRequiredMixed(has, req string) bool {
	if i := strings.Index(has, "*"); i > -1 && i == len(has)-1 {
		// we are ok for prefix match
		return DoesMatchRequiredPrefix(has, req)
	} else {
		// must do extended match
		return DoesMatchRequired(has, req)
	}
}

// foo:*:bar => /^foo:([^:]+):bar$/
// foo:**:bar => /^foo:(.+):bar$/
// foo:**:bar:* => /^foo:(.+):bar:([^:]+)$/
// compiles a match string into a regex
func CompileMatchString(has string) (*regexp.Regexp, error) {
	var ps []string
	for hi, hj, hl := 0, 0, len(has); hi < hl; hi++ {
		if has[hi] == '*' {
			if hi < hl-1 && has[hi+1] == '*' {
				// spread wc
				ps = append(ps, "(.+)")
				hi += 2
			} else if hi == hl-1 {
				// terminal wc
				ps = append(ps, "(.+)")
			} else {
				// inner wc
				ps = append(ps, "([^:]+)")
				hi += 1
			}
		} else {
			for hj = hi; hj < hl && has[hj] != ':'; hj++ {
			}
			ps = append(ps, has[hi:hj])
			hi = hj
		}
	}
	re, err := regexp.Compile("^" + strings.Join(ps, ":") + "$")
	if err != nil {
		return nil, err
	}
	return re, nil
}

// Allows matching on `has` strings like urn:foo:*:bar:*.
// This is complex, but is still performant though not quite as performant as a pure
// prefix search like `DoesMatchRequiredSimple`. It is far faster than compiling and using
// a regex, even when caching the resulting complication. It is also faster than prescanning
// for an inner wildcard and choosing between `DoesMatchRequired` and `DoesMatchRequiredSimple`.
func DoesMatchRequired(has, req string) bool {
	hl := len(has)
	rl := len(req)
	ri, hi := 0, 0

	for ; ri < rl && hi < hl; ri, hi = ri+1, hi+1 {
		if has[hi] == '*' {
			if hi == hl-1 {
				// wildcard at end matches rest
				return true
			} else if has[hi+1] == '*' {
				// spread wildcard, advance hi to next `:`, advance ri to same remaining segment number, more expensive branch
				// i.e. if hi has two remaining segments after the `:`, then ri must also have two remaining segments
				if hi+2 < hl && has[hi+2] != ':' { // sanity check
					return false
				}
				hi += 2 // hi on `:`
				// foo:**:bar
				//       ^ ==> scan forward count `:` as N
				// foo:one:two:bar
				//                ^ <== scan backward for `:` N times as long as > ri
				n := 0
				for hj := hi; hj < hl; hj++ {
					if has[hj] == ':' {
						n += 1
					}
				}
				rj := rl - 1
				for ; rj >= ri; rj-- { // >= is correct here?
					if req[rj] == ':' {
						n -= 1
						if n == 0 {
							break
						}
					}
				}
				if n > 0 || req[rj] != ':' { // sanity checks
					return false
				}
				ri = rj
				// both hi and ri are on `:`
			} else if has[hi+1] != ':' {
				// sanity check, if not at end, next character must be `:`
				return false
			} else {
				// advance hi to next `:`, advance ri to next `:`
				hi += 1 // we know next char is `:` due to previous sanity check
				for ri < rl {
					if req[ri] == ':' {
						break
					} else {
						ri += 1
					}
				}
			}
		} else if has[hi] != req[ri] {
			// current character must match
			return false
		}
	}

	if ri != rl || hi != hl {
		// if we did not consume both strings, not a match
		return false
	} else {
		return true
	}
}

## matcher_bench.md

      
    Raw
  

              matcher_bench.md
            
          
    cpu: AMD Ryzen 9 7950X3D 16-Core Processor
BenchmarkSimpleDoesMatchRequiredPrefix-32               422902569                2.840 ns/op
BenchmarkSimpleDoesMatchRequired-32                     189211654                6.332 ns/op
BenchmarkSimpleDoesMatchRequiredCompiled-32             30067350                39.45 ns/op
BenchmarkComplexDoesMatchRequired-32                    95626672                12.56 ns/op
BenchmarkComplexDoesMatchRequiredCompiled-32             6844918               177.7 ns/op
PASS
ok      github.com/cloverleaf/cloe/services/leaf/resource       6.055s


## matcher_test.go
package resource

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

func TestDoesMatchRequiredSimple(t *testing.T) {
	td := []struct {
		name     string
		has      string
		req      string
		expected bool
	}{
		{"same specificity, no wildcards, equal", "foo:1", "foo:1", true},
		{"same specificity, no wildcards, not equal", "foo:1", "foo:2", false},
		{"more specificity, no wildcards, same prefix", "foo:1:bar:1", "foo:1", false},
		{"more specificity, has wildcard, same prefix", "foo:1:bar:*", "foo:1", false},
		{"same specificity, has wildcard", "foo:*", "foo:1", true},
		{"less specificity, has wildcard", "foo:*", "foo:1:bar:1", true},
	}

	for _, d := range td {
		t.Run(d.name, func(t *testing.T) {
			is := assert.New(t)
			is.Equalf(d.expected, DoesMatchRequiredPrefix(d.has, d.req), d.name)
		})
	}
}

func TestDoesMatchRequired(t *testing.T) {
	td := []struct {
		name     string
		has      string
		req      string
		expected bool
	}{
		{"same specificity, no wildcards, equal", "foo:1", "foo:1", true},
		{"same specificity, no wildcards, not equal", "foo:1", "foo:2", false},
		{"more specificity, no wildcards, same prefix", "foo:1:bar:1", "foo:1", false},
		{"more specificity, has wildcard, same prefix", "foo:1:bar:*", "foo:1", false},
		{"same specificity, has wildcard", "foo:*", "foo:1", true},
		{"less specificity, has wildcard", "foo:*", "foo:1:bar:1", true},
		{"same specificity, inner wildcard, same suffix", "foo:*:bar:1", "foo:2:bar:1", true},
		{"same specificity, inner wildcard, diff suffix", "foo:*:bar:1", "foo:2:bar:2", false},
		{"same specificity, two separate wildcard", "foo:*:bar:*", "foo:2:bar:2", true},
		{"same specificity, two separate wildcard, diff", "foo:*:car:*", "foo:2:bar:2", false},
		{"spread wildcard, same prefix, same suffix", "foo:**:bar", "foo:one:two:bar", true},
		{"spread wildcard, same prefix, diff suffix", "foo:**:car", "foo:one:two:bar", false},
		{"spread wildcard, same prefix, same suffix, wildcard suffix", "foo:**:bar:*", "foo:one:two:bar:1", true},
		{"spread wildcard, same prefix, diff suffix, wildcard suffix", "foo:**:car:*", "foo:one:two:bar:1", false},
	}

	for _, d := range td {
		t.Run(d.name, func(t *testing.T) {
			is := assert.New(t)
			is.Equalf(d.expected, DoesMatchRequired(d.has, d.req), d.name)
		})
	}
}

func BenchmarkSimpleDoesMatchRequiredPrefix(b *testing.B) {
	for b.Loop() {
		DoesMatchRequiredPrefix("foo:bar:*", "foo:bar:1")
	}
}

func BenchmarkSimpleDoesMatchRequired(b *testing.B) {
	for b.Loop() {
		DoesMatchRequired("foo:bar:*", "foo:bar:1")
	}
}

func BenchmarkSimpleDoesMatchRequiredCompiled(b *testing.B) {
	r, _ := CompileMatchString("foo:bar:*")
	for b.Loop() {
		_ = r.MatchString("foo:bar:1")
	}
}

func BenchmarkComplexDoesMatchRequired(b *testing.B) {
	for b.Loop() {
		DoesMatchRequired("foo:**:bar:*", "foo:a:b:bar:1")
	}
}

func BenchmarkComplexDoesMatchRequiredCompiled(b *testing.B) {
	r, _ := CompileMatchString("foo:**:bar:*")
	for b.Loop() {
		_ = r.MatchString("foo:a:b:bar:1")
	}
}

func TestValidateMatchString(t *testing.T) {
	td := []struct {
		name string
		has  string
		ok   bool
	}{
		{"exact", "foo:1", true},
		{"terminal wildcard", "foo:*", true},
		{"bad wildcard", "foo:*x", false},
		{"bad wildcard", "foo:x*", false},
		{"bad wildcard", "foo*", false},
		{"bad wildcard", "foo*:", false},
		{"nothing after colon", "foo:", false},
		{"deeper terminal wildcard", "foo:bar:*", true},
		{"internal wildcard", "foo:*:bar", true},
		{"internal & terminal wildcard", "foo:*:bar:*", true},
		{"ok spread wildcard", "foo:**:bar", true},
		{"multiple spread wildcard", "foo:**:bar:**:car", false},
		{"terminal spread wildcard", "foo:**", false},
	}

	for _, d := range td {
		t.Run(d.name, func(t *testing.T) {
			is := assert.New(t)
			err := ValidateMatchString(d.has)
			if d.ok {
				is.NoErrorf(err, d.name)
			} else {
				is.Errorf(err, d.name)
			}
		})
	}
}
	package resource

	import (
	"errors"
	"regexp"
	"strings"
	)

	var matchStringRE = regexp.MustCompile(`(?i)^(\w+?\|\)((:(\w+\|\\|\\))?:(\w+\|\)\|)$`)

	func ValidateMatchString(has string) error {
	if !matchStringRE.MatchString(has) {
	return errors.New("invalid match string")
	}
	if strings.Count(has, "**") > 1 {
	return errors.New("invalid match string")
	}
	return nil
	}

	func NormalizeMatchString(has string) string {
	return strings.ToLower(has)
	}

	// can only can match terminal wildcards
	func DoesMatchRequiredPrefix(has, req string) bool {
	if has[len(has)-1] == '*' {
	return strings.HasPrefix(req, has[:len(has)-1])
	} else {
	return has == req
	}
	}

	// only useful if there is a large number of prefix only (terminal wildcard) matches compared to complex ones
	func DoesMatchRequiredMixed(has, req string) bool {
	if i := strings.Index(has, "*"); i > -1 && i == len(has)-1 {
	// we are ok for prefix match
	return DoesMatchRequiredPrefix(has, req)
	} else {
	// must do extended match
	return DoesMatchRequired(has, req)
	}
	}

	// foo:*:bar => /^foo:([^:]+):bar$/
	// foo:**:bar => /^foo:(.+):bar$/
	// foo:*:bar: => /^foo:(.+):bar:([^:]+)$/
	// compiles a match string into a regex
	func CompileMatchString(has string) (*regexp.Regexp, error) {
	var ps []string
	for hi, hj, hl := 0, 0, len(has); hi < hl; hi++ {
	if has[hi] == '*' {
	if hi < hl-1 && has[hi+1] == '*' {
	// spread wc
	ps = append(ps, "(.+)")
	hi += 2
	} else if hi == hl-1 {
	// terminal wc
	ps = append(ps, "(.+)")
	} else {
	// inner wc
	ps = append(ps, "([^:]+)")
	hi += 1
	}
	} else {
	for hj = hi; hj < hl && has[hj] != ':'; hj++ {
	}
	ps = append(ps, has[hi:hj])
	hi = hj
	}
	}
	re, err := regexp.Compile("^" + strings.Join(ps, ":") + "$")
	if err != nil {
	return nil, err
	}
	return re, nil
	}

	// Allows matching on `has` strings like urn:foo::bar:.
	// This is complex, but is still performant though not quite as performant as a pure
	// prefix search like `DoesMatchRequiredSimple`. It is far faster than compiling and using
	// a regex, even when caching the resulting complication. It is also faster than prescanning
	// for an inner wildcard and choosing between `DoesMatchRequired` and `DoesMatchRequiredSimple`.
	func DoesMatchRequired(has, req string) bool {
	hl := len(has)
	rl := len(req)
	ri, hi := 0, 0

	for ; ri < rl && hi < hl; ri, hi = ri+1, hi+1 {
	if has[hi] == '*' {
	if hi == hl-1 {
	// wildcard at end matches rest
	return true
	} else if has[hi+1] == '*' {
	// spread wildcard, advance hi to next `:`, advance ri to same remaining segment number, more expensive branch
	// i.e. if hi has two remaining segments after the `:`, then ri must also have two remaining segments
	if hi+2 < hl && has[hi+2] != ':' { // sanity check
	return false
	}
	hi += 2 // hi on `:`
	// foo:**:bar
	// ^ ==> scan forward count `:` as N
	// foo:one:two:bar
	// ^ <== scan backward for `:` N times as long as > ri
	n := 0
	for hj := hi; hj < hl; hj++ {
	if has[hj] == ':' {
	n += 1
	}
	}
	rj := rl - 1
	for ; rj >= ri; rj-- { // >= is correct here?
	if req[rj] == ':' {
	n -= 1
	if n == 0 {
	break
	}
	}
	}
	if n > 0 \|\| req[rj] != ':' { // sanity checks
	return false
	}
	ri = rj
	// both hi and ri are on `:`
	} else if has[hi+1] != ':' {
	// sanity check, if not at end, next character must be `:`
	return false
	} else {
	// advance hi to next `:`, advance ri to next `:`
	hi += 1 // we know next char is `:` due to previous sanity check
	for ri < rl {
	if req[ri] == ':' {
	break
	} else {
	ri += 1
	}
	}
	}
	} else if has[hi] != req[ri] {
	// current character must match
	return false
	}
	}

	if ri != rl \|\| hi != hl {
	// if we did not consume both strings, not a match
	return false
	} else {
	return true
	}
	}
	package resource

	import (
	"testing"

	"github.com/stretchr/testify/assert"
	)

	func TestDoesMatchRequiredSimple(t *testing.T) {
	td := []struct {
	name string
	has string
	req string
	expected bool
	}{
	{"same specificity, no wildcards, equal", "foo:1", "foo:1", true},
	{"same specificity, no wildcards, not equal", "foo:1", "foo:2", false},
	{"more specificity, no wildcards, same prefix", "foo:1:bar:1", "foo:1", false},
	{"more specificity, has wildcard, same prefix", "foo:1:bar:*", "foo:1", false},
	{"same specificity, has wildcard", "foo:*", "foo:1", true},
	{"less specificity, has wildcard", "foo:*", "foo:1:bar:1", true},
	}

	for _, d := range td {
	t.Run(d.name, func(t *testing.T) {
	is := assert.New(t)
	is.Equalf(d.expected, DoesMatchRequiredPrefix(d.has, d.req), d.name)
	})
	}
	}

	func TestDoesMatchRequired(t *testing.T) {
	td := []struct {
	name string
	has string
	req string
	expected bool
	}{
	{"same specificity, no wildcards, equal", "foo:1", "foo:1", true},
	{"same specificity, no wildcards, not equal", "foo:1", "foo:2", false},
	{"more specificity, no wildcards, same prefix", "foo:1:bar:1", "foo:1", false},
	{"more specificity, has wildcard, same prefix", "foo:1:bar:*", "foo:1", false},
	{"same specificity, has wildcard", "foo:*", "foo:1", true},
	{"less specificity, has wildcard", "foo:*", "foo:1:bar:1", true},
	{"same specificity, inner wildcard, same suffix", "foo:*:bar:1", "foo:2:bar:1", true},
	{"same specificity, inner wildcard, diff suffix", "foo:*:bar:1", "foo:2:bar:2", false},
	{"same specificity, two separate wildcard", "foo::bar:", "foo:2:bar:2", true},
	{"same specificity, two separate wildcard, diff", "foo::car:", "foo:2:bar:2", false},
	{"spread wildcard, same prefix, same suffix", "foo:**:bar", "foo:one:two:bar", true},
	{"spread wildcard, same prefix, diff suffix", "foo:**:car", "foo:one:two:bar", false},
	{"spread wildcard, same prefix, same suffix, wildcard suffix", "foo:*:bar:", "foo:one:two:bar:1", true},
	{"spread wildcard, same prefix, diff suffix, wildcard suffix", "foo:*:car:", "foo:one:two:bar:1", false},
	}

	for _, d := range td {
	t.Run(d.name, func(t *testing.T) {
	is := assert.New(t)
	is.Equalf(d.expected, DoesMatchRequired(d.has, d.req), d.name)
	})
	}
	}

	func BenchmarkSimpleDoesMatchRequiredPrefix(b *testing.B) {
	for b.Loop() {
	DoesMatchRequiredPrefix("foo:bar:*", "foo:bar:1")
	}
	}

	func BenchmarkSimpleDoesMatchRequired(b *testing.B) {
	for b.Loop() {
	DoesMatchRequired("foo:bar:*", "foo:bar:1")
	}
	}

	func BenchmarkSimpleDoesMatchRequiredCompiled(b *testing.B) {
	r, _ := CompileMatchString("foo:bar:*")
	for b.Loop() {
	_ = r.MatchString("foo:bar:1")
	}
	}

	func BenchmarkComplexDoesMatchRequired(b *testing.B) {
	for b.Loop() {
	DoesMatchRequired("foo:*:bar:", "foo:a:b:bar:1")
	}
	}

	func BenchmarkComplexDoesMatchRequiredCompiled(b *testing.B) {
	r, _ := CompileMatchString("foo:*:bar:")
	for b.Loop() {
	_ = r.MatchString("foo:a:b:bar:1")
	}
	}

	func TestValidateMatchString(t *testing.T) {
	td := []struct {
	name string
	has string
	ok bool
	}{
	{"exact", "foo:1", true},
	{"terminal wildcard", "foo:*", true},
	{"bad wildcard", "foo:*x", false},
	{"bad wildcard", "foo:x*", false},
	{"bad wildcard", "foo*", false},
	{"bad wildcard", "foo*:", false},
	{"nothing after colon", "foo:", false},
	{"deeper terminal wildcard", "foo:bar:*", true},
	{"internal wildcard", "foo:*:bar", true},
	{"internal & terminal wildcard", "foo::bar:", true},
	{"ok spread wildcard", "foo:**:bar", true},
	{"multiple spread wildcard", "foo::bar::car", false},
	{"terminal spread wildcard", "foo:**", false},
	}

	for _, d := range td {
	t.Run(d.name, func(t *testing.T) {
	is := assert.New(t)
	err := ValidateMatchString(d.has)
	if d.ok {
	is.NoErrorf(err, d.name)
	} else {
	is.Errorf(err, d.name)
	}
	})
	}
	}