From dd84b9d64fb98746a230cd24233ff50a562c39c9 Mon Sep 17 00:00:00 2001 From: 简律纯 Date: Fri, 28 Apr 2023 01:36:44 +0800 Subject: --- cli/internal/doublestar/doublestar.go | 11 + cli/internal/doublestar/doublestar_test.go | 557 +++++++++++++++++++++++++++++ cli/internal/doublestar/glob.go | 393 ++++++++++++++++++++ cli/internal/doublestar/globwalk.go | 277 ++++++++++++++ cli/internal/doublestar/match.go | 377 +++++++++++++++++++ cli/internal/doublestar/utils.go | 71 ++++ cli/internal/doublestar/validate.go | 83 +++++ 7 files changed, 1769 insertions(+) create mode 100644 cli/internal/doublestar/doublestar.go create mode 100644 cli/internal/doublestar/doublestar_test.go create mode 100644 cli/internal/doublestar/glob.go create mode 100644 cli/internal/doublestar/globwalk.go create mode 100644 cli/internal/doublestar/match.go create mode 100644 cli/internal/doublestar/utils.go create mode 100644 cli/internal/doublestar/validate.go (limited to 'cli/internal/doublestar') diff --git a/cli/internal/doublestar/doublestar.go b/cli/internal/doublestar/doublestar.go new file mode 100644 index 0000000..6fa05f1 --- /dev/null +++ b/cli/internal/doublestar/doublestar.go @@ -0,0 +1,11 @@ +// Package doublestar is adapted from https://github.com/bmatcuk/doublestar +// Copyright Bob Matcuk. All Rights Reserved. +// SPDX-License-Identifier: MIT +package doublestar + +import ( + "path" +) + +// ErrBadPattern indicates a pattern was malformed. +var ErrBadPattern = path.ErrBadPattern diff --git a/cli/internal/doublestar/doublestar_test.go b/cli/internal/doublestar/doublestar_test.go new file mode 100644 index 0000000..512f8b7 --- /dev/null +++ b/cli/internal/doublestar/doublestar_test.go @@ -0,0 +1,557 @@ +// Package doublestar is adapted from https://github.com/bmatcuk/doublestar +// Copyright Bob Matcuk. All Rights Reserved. +// SPDX-License-Identifier: MIT + +// This file is mostly copied from Go's path/match_test.go + +package doublestar + +import ( + "io/fs" + "log" + "os" + "path" + "path/filepath" + "runtime" + "strings" + "testing" +) + +type MatchTest struct { + pattern, testPath string // a pattern and path to test the pattern on + shouldMatch bool // true if the pattern should match the path + expectedErr error // an expected error + isStandard bool // pattern doesn't use any doublestar features + testOnDisk bool // true: test pattern against files in "test" directory + numResults int // number of glob results if testing on disk + winNumResults int // number of glob results on Windows +} + +// Tests which contain escapes and symlinks will not work on Windows +var onWindows = runtime.GOOS == "windows" + +var matchTests = []MatchTest{ + {"*", "", true, nil, true, false, 0, 0}, + {"*", "/", false, nil, true, false, 0, 0}, + {"/*", "/", true, nil, true, false, 0, 0}, + {"/*", "/debug/", false, nil, true, false, 0, 0}, + {"/*", "//", false, nil, true, false, 0, 0}, + {"abc", "abc", true, nil, true, true, 1, 1}, + {"*", "abc", true, nil, true, true, 19, 15}, + {"*c", "abc", true, nil, true, true, 2, 2}, + {"*/", "a/", true, nil, true, false, 0, 0}, + {"a*", "a", true, nil, true, true, 9, 9}, + {"a*", "abc", true, nil, true, true, 9, 9}, + {"a*", "ab/c", false, nil, true, true, 9, 9}, + {"a*/b", "abc/b", true, nil, true, true, 2, 2}, + {"a*/b", "a/c/b", false, nil, true, true, 2, 2}, + {"a*b*c*d*e*", "axbxcxdxe", true, nil, true, true, 3, 3}, + {"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil, true, true, 2, 2}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil, true, true, 2, 2}, + {"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil, true, true, 2, 2}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil, true, true, 2, 2}, + {"a*b?c*x", "abxbbxdbxebxczzx", true, nil, true, true, 2, 2}, + {"a*b?c*x", "abxbbxdbxebxczzy", false, nil, true, true, 2, 2}, + {"ab[c]", "abc", true, nil, true, true, 1, 1}, + {"ab[b-d]", "abc", true, nil, true, true, 1, 1}, + {"ab[e-g]", "abc", false, nil, true, true, 0, 0}, + {"ab[^c]", "abc", false, nil, true, true, 0, 0}, + {"ab[^b-d]", "abc", false, nil, true, true, 0, 0}, + {"ab[^e-g]", "abc", true, nil, true, true, 1, 1}, + {"a\\*b", "ab", false, nil, true, true, 0, 0}, + {"a?b", "a☺b", true, nil, true, true, 1, 1}, + {"a[^a]b", "a☺b", true, nil, true, true, 1, 1}, + {"a[!a]b", "a☺b", true, nil, false, true, 1, 1}, + {"a???b", "a☺b", false, nil, true, true, 0, 0}, + {"a[^a][^a][^a]b", "a☺b", false, nil, true, true, 0, 0}, + {"[a-ζ]*", "α", true, nil, true, true, 17, 15}, + {"*[a-ζ]", "A", false, nil, true, true, 17, 15}, + {"a?b", "a/b", false, nil, true, true, 1, 1}, + {"a*b", "a/b", false, nil, true, true, 1, 1}, + {"[\\]a]", "]", true, nil, true, !onWindows, 2, 2}, + {"[\\-]", "-", true, nil, true, !onWindows, 1, 1}, + {"[x\\-]", "x", true, nil, true, !onWindows, 2, 2}, + {"[x\\-]", "-", true, nil, true, !onWindows, 2, 2}, + {"[x\\-]", "z", false, nil, true, !onWindows, 2, 2}, + {"[\\-x]", "x", true, nil, true, !onWindows, 2, 2}, + {"[\\-x]", "-", true, nil, true, !onWindows, 2, 2}, + {"[\\-x]", "a", false, nil, true, !onWindows, 2, 2}, + {"[]a]", "]", false, ErrBadPattern, true, true, 0, 0}, + // doublestar, like bash, allows these when path.Match() does not + {"[-]", "-", true, nil, false, !onWindows, 1, 0}, + {"[x-]", "x", true, nil, false, true, 2, 1}, + {"[x-]", "-", true, nil, false, !onWindows, 2, 1}, + {"[x-]", "z", false, nil, false, true, 2, 1}, + {"[-x]", "x", true, nil, false, true, 2, 1}, + {"[-x]", "-", true, nil, false, !onWindows, 2, 1}, + {"[-x]", "a", false, nil, false, true, 2, 1}, + {"[a-b-d]", "a", true, nil, false, true, 3, 2}, + {"[a-b-d]", "b", true, nil, false, true, 3, 2}, + {"[a-b-d]", "-", true, nil, false, !onWindows, 3, 2}, + {"[a-b-d]", "c", false, nil, false, true, 3, 2}, + {"[a-b-x]", "x", true, nil, false, true, 4, 3}, + {"\\", "a", false, ErrBadPattern, true, !onWindows, 0, 0}, + {"[", "a", false, ErrBadPattern, true, true, 0, 0}, + {"[^", "a", false, ErrBadPattern, true, true, 0, 0}, + {"[^bc", "a", false, ErrBadPattern, true, true, 0, 0}, + {"a[", "a", false, ErrBadPattern, true, true, 0, 0}, + {"a[", "ab", false, ErrBadPattern, true, true, 0, 0}, + {"ad[", "ab", false, ErrBadPattern, true, true, 0, 0}, + {"*x", "xxx", true, nil, true, true, 4, 4}, + {"[abc]", "b", true, nil, true, true, 3, 3}, + {"**", "", true, nil, false, false, 38, 38}, + {"a/**", "a", true, nil, false, true, 7, 7}, + {"a/**", "a/", true, nil, false, false, 7, 7}, + {"a/**", "a/b", true, nil, false, true, 7, 7}, + {"a/**", "a/b/c", true, nil, false, true, 7, 7}, + // These tests differ since we've disabled walking symlinks + {"**/c", "c", true, nil, false, true, 4, 4}, + {"**/c", "b/c", true, nil, false, true, 4, 4}, + {"**/c", "a/b/c", true, nil, false, true, 4, 4}, + {"**/c", "a/b", false, nil, false, true, 4, 4}, + {"**/c", "abcd", false, nil, false, true, 4, 4}, + {"**/c", "a/abc", false, nil, false, true, 4, 4}, + {"a/**/b", "a/b", true, nil, false, true, 2, 2}, + {"a/**/c", "a/b/c", true, nil, false, true, 2, 2}, + {"a/**/d", "a/b/c/d", true, nil, false, true, 1, 1}, + {"a/\\**", "a/b/c", false, nil, false, !onWindows, 0, 0}, + {"a/\\[*\\]", "a/bc", false, nil, true, !onWindows, 0, 0}, + // this is an odd case: filepath.Glob() will return results + {"a//b/c", "a/b/c", false, nil, true, false, 0, 0}, + {"a/b/c", "a/b//c", false, nil, true, true, 1, 1}, + // also odd: Glob + filepath.Glob return results + {"a/", "a", false, nil, true, false, 0, 0}, + {"ab{c,d}", "abc", true, nil, false, true, 1, 1}, + {"ab{c,d,*}", "abcde", true, nil, false, true, 5, 5}, + {"ab{c,d}[", "abcd", false, ErrBadPattern, false, true, 0, 0}, + {"a{,bc}", "a", true, nil, false, true, 2, 2}, + {"a{,bc}", "abc", true, nil, false, true, 2, 2}, + {"a/{b/c,c/b}", "a/b/c", true, nil, false, true, 2, 2}, + {"a/{b/c,c/b}", "a/c/b", true, nil, false, true, 2, 2}, + {"{a/{b,c},abc}", "a/b", true, nil, false, true, 3, 3}, + {"{a/{b,c},abc}", "a/c", true, nil, false, true, 3, 3}, + {"{a/{b,c},abc}", "abc", true, nil, false, true, 3, 3}, + {"{a/{b,c},abc}", "a/b/c", false, nil, false, true, 3, 3}, + {"{a/ab*}", "a/abc", true, nil, false, true, 1, 1}, + {"{a/*}", "a/b", true, nil, false, true, 3, 3}, + {"{a/abc}", "a/abc", true, nil, false, true, 1, 1}, + {"{a/b,a/c}", "a/c", true, nil, false, true, 2, 2}, + {"abc/**", "abc/b", true, nil, false, true, 3, 3}, + {"**/abc", "abc", true, nil, false, true, 2, 2}, + {"abc**", "abc/b", false, nil, false, true, 3, 3}, + {"**/*.txt", "abc/【test】.txt", true, nil, false, true, 1, 1}, + {"**/【*", "abc/【test】.txt", true, nil, false, true, 1, 1}, + // unfortunately, io/fs can't handle this, so neither can Glob =( + {"broken-symlink", "broken-symlink", true, nil, true, false, 1, 1}, + // We don't care about matching a particular file, we want to verify + // that we don't traverse the symlink + {"working-symlink/c/*", "working-symlink/c/d", true, nil, true, !onWindows, 1, 1}, + {"working-sym*/*", "irrelevant", false, nil, false, !onWindows, 0, 0}, + {"b/**/f", "irrelevant", false, nil, false, !onWindows, 0, 0}, +} + +func TestValidatePattern(t *testing.T) { + for idx, tt := range matchTests { + testValidatePatternWith(t, idx, tt) + } +} + +func testValidatePatternWith(t *testing.T, idx int, tt MatchTest) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Validate(%#q) panicked: %#v", idx, tt.pattern, r) + } + }() + + result := ValidatePattern(tt.pattern) + if result != (tt.expectedErr == nil) { + t.Errorf("#%v. ValidatePattern(%#q) = %v want %v", idx, tt.pattern, result, !result) + } +} + +func TestMatch(t *testing.T) { + for idx, tt := range matchTests { + // Since Match() always uses "/" as the separator, we + // don't need to worry about the tt.testOnDisk flag + testMatchWith(t, idx, tt) + } +} + +func testMatchWith(t *testing.T, idx int, tt MatchTest) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Match(%#q, %#q) panicked: %#v", idx, tt.pattern, tt.testPath, r) + } + }() + + // Match() always uses "/" as the separator + ok, err := Match(tt.pattern, tt.testPath) + if ok != tt.shouldMatch || err != tt.expectedErr { + t.Errorf("#%v. Match(%#q, %#q) = %v, %v want %v, %v", idx, tt.pattern, tt.testPath, ok, err, tt.shouldMatch, tt.expectedErr) + } + + if tt.isStandard { + stdOk, stdErr := path.Match(tt.pattern, tt.testPath) + if ok != stdOk || !compareErrors(err, stdErr) { + t.Errorf("#%v. Match(%#q, %#q) != path.Match(...). Got %v, %v want %v, %v", idx, tt.pattern, tt.testPath, ok, err, stdOk, stdErr) + } + } +} + +func BenchmarkMatch(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, tt := range matchTests { + if tt.isStandard { + _, _ = Match(tt.pattern, tt.testPath) + } + } + } +} + +func BenchmarkGoMatch(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, tt := range matchTests { + if tt.isStandard { + _, _ = path.Match(tt.pattern, tt.testPath) + } + } + } +} + +func TestPathMatch(t *testing.T) { + for idx, tt := range matchTests { + // Even though we aren't actually matching paths on disk, we are using + // PathMatch() which will use the system's separator. As a result, any + // patterns that might cause problems on-disk need to also be avoided + // here in this test. + if tt.testOnDisk { + testPathMatchWith(t, idx, tt) + } + } +} + +func testPathMatchWith(t *testing.T, idx int, tt MatchTest) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Match(%#q, %#q) panicked: %#v", idx, tt.pattern, tt.testPath, r) + } + }() + + pattern := filepath.FromSlash(tt.pattern) + testPath := filepath.FromSlash(tt.testPath) + ok, err := PathMatch(pattern, testPath) + if ok != tt.shouldMatch || err != tt.expectedErr { + t.Errorf("#%v. PathMatch(%#q, %#q) = %v, %v want %v, %v", idx, pattern, testPath, ok, err, tt.shouldMatch, tt.expectedErr) + } + + if tt.isStandard { + stdOk, stdErr := filepath.Match(pattern, testPath) + if ok != stdOk || !compareErrors(err, stdErr) { + t.Errorf("#%v. PathMatch(%#q, %#q) != filepath.Match(...). Got %v, %v want %v, %v", idx, pattern, testPath, ok, err, stdOk, stdErr) + } + } +} + +func TestPathMatchFake(t *testing.T) { + // This test fakes that our path separator is `\\` so we can test what it + // would be like on Windows - obviously, we don't need to do that if we + // actually _are_ on Windows, since TestPathMatch will cover it. + if onWindows { + return + } + + for idx, tt := range matchTests { + // Even though we aren't actually matching paths on disk, we are using + // PathMatch() which will use the system's separator. As a result, any + // patterns that might cause problems on-disk need to also be avoided + // here in this test. + if tt.testOnDisk && tt.pattern != "\\" { + testPathMatchFakeWith(t, idx, tt) + } + } +} + +func testPathMatchFakeWith(t *testing.T, idx int, tt MatchTest) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Match(%#q, %#q) panicked: %#v", idx, tt.pattern, tt.testPath, r) + } + }() + + pattern := strings.ReplaceAll(tt.pattern, "/", "\\") + testPath := strings.ReplaceAll(tt.testPath, "/", "\\") + ok, err := matchWithSeparator(pattern, testPath, '\\', true) + if ok != tt.shouldMatch || err != tt.expectedErr { + t.Errorf("#%v. PathMatch(%#q, %#q) = %v, %v want %v, %v", idx, pattern, testPath, ok, err, tt.shouldMatch, tt.expectedErr) + } +} + +func BenchmarkPathMatch(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, tt := range matchTests { + if tt.isStandard && tt.testOnDisk { + pattern := filepath.FromSlash(tt.pattern) + testPath := filepath.FromSlash(tt.testPath) + _, _ = PathMatch(pattern, testPath) + } + } + } +} + +func BenchmarkGoPathMatch(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, tt := range matchTests { + if tt.isStandard && tt.testOnDisk { + pattern := filepath.FromSlash(tt.pattern) + testPath := filepath.FromSlash(tt.testPath) + _, _ = filepath.Match(pattern, testPath) + } + } + } +} + +func TestGlob(t *testing.T) { + fsys := os.DirFS("test") + for idx, tt := range matchTests { + if tt.testOnDisk { + testGlobWith(t, idx, tt, fsys) + } + } +} + +func testGlobWith(t *testing.T, idx int, tt MatchTest, fsys fs.FS) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Glob(%#q) panicked: %#v", idx, tt.pattern, r) + } + }() + + matches, err := Glob(fsys, tt.pattern) + verifyGlobResults(t, idx, "Glob", tt, fsys, matches, err) +} + +func TestGlobWalk(t *testing.T) { + fsys := os.DirFS("test") + for idx, tt := range matchTests { + if tt.testOnDisk { + testGlobWalkWith(t, idx, tt, fsys) + } + } +} + +func testGlobWalkWith(t *testing.T, idx int, tt MatchTest, fsys fs.FS) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Glob(%#q) panicked: %#v", idx, tt.pattern, r) + } + }() + + var matches []string + err := GlobWalk(fsys, tt.pattern, func(p string, d fs.DirEntry) error { + matches = append(matches, p) + return nil + }) + verifyGlobResults(t, idx, "GlobWalk", tt, fsys, matches, err) +} + +func verifyGlobResults(t *testing.T, idx int, fn string, tt MatchTest, fsys fs.FS, matches []string, err error) { + numResults := tt.numResults + if onWindows { + numResults = tt.winNumResults + } + if len(matches) != numResults { + t.Errorf("#%v. %v(%#q) = %#v - should have %#v results", idx, fn, tt.pattern, matches, tt.numResults) + } + if inSlice(tt.testPath, matches) != tt.shouldMatch { + if tt.shouldMatch { + t.Errorf("#%v. %v(%#q) = %#v - doesn't contain %v, but should", idx, fn, tt.pattern, matches, tt.testPath) + } else { + t.Errorf("#%v. %v(%#q) = %#v - contains %v, but shouldn't", idx, fn, tt.pattern, matches, tt.testPath) + } + } + if err != tt.expectedErr { + t.Errorf("#%v. %v(%#q) has error %v, but should be %v", idx, fn, tt.pattern, err, tt.expectedErr) + } + + if tt.isStandard { + stdMatches, stdErr := fs.Glob(fsys, tt.pattern) + if !compareSlices(matches, stdMatches) || !compareErrors(err, stdErr) { + t.Errorf("#%v. %v(%#q) != fs.Glob(...). Got %#v, %v want %#v, %v", idx, fn, tt.pattern, matches, err, stdMatches, stdErr) + } + } +} + +func BenchmarkGlob(b *testing.B) { + fsys := os.DirFS("test") + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, tt := range matchTests { + if tt.isStandard && tt.testOnDisk { + _, _ = Glob(fsys, tt.pattern) + } + } + } +} + +func BenchmarkGlobWalk(b *testing.B) { + fsys := os.DirFS("test") + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, tt := range matchTests { + if tt.isStandard && tt.testOnDisk { + _ = GlobWalk(fsys, tt.pattern, func(p string, d fs.DirEntry) error { + return nil + }) + } + } + } +} + +func BenchmarkGoGlob(b *testing.B) { + fsys := os.DirFS("test") + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, tt := range matchTests { + if tt.isStandard && tt.testOnDisk { + _, _ = fs.Glob(fsys, tt.pattern) + } + } + } +} + +func compareErrors(a, b error) bool { + if a == nil { + return b == nil + } + return b != nil +} + +func inSlice(s string, a []string) bool { + for _, i := range a { + if i == s { + return true + } + } + return false +} + +func compareSlices(a, b []string) bool { + if len(a) != len(b) { + return false + } + + diff := make(map[string]int, len(a)) + + for _, x := range a { + diff[x]++ + } + + for _, y := range b { + if _, ok := diff[y]; !ok { + return false + } + + diff[y]-- + if diff[y] == 0 { + delete(diff, y) + } + } + + return len(diff) == 0 +} + +func mkdirp(parts ...string) { + dirs := path.Join(parts...) + err := os.MkdirAll(dirs, 0755) + if err != nil { + log.Fatalf("Could not create test directories %v: %v\n", dirs, err) + } +} + +func touch(parts ...string) { + filename := path.Join(parts...) + f, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create test file %v: %v\n", filename, err) + } + _ = f.Close() +} + +func symlink(oldname, newname string) { + // since this will only run on non-windows, we can assume "/" as path separator + err := os.Symlink(oldname, newname) + if err != nil && !os.IsExist(err) { + log.Fatalf("Could not create symlink %v -> %v: %v\n", oldname, newname, err) + } +} + +func TestGlobSorted(t *testing.T) { + fsys := os.DirFS("test") + expected := []string{"a", "abc", "abcd", "abcde", "abxbbxdbxebxczzx", "abxbbxdbxebxczzy", "axbxcxdxe", "axbxcxdxexxx", "a☺b"} + matches, err := Glob(fsys, "a*") + if err != nil { + t.Errorf("Unexpected error %v", err) + return + } + + if len(matches) != len(expected) { + t.Errorf("Glob returned %#v; expected %#v", matches, expected) + return + } + for idx, match := range matches { + if match != expected[idx] { + t.Errorf("Glob returned %#v; expected %#v", matches, expected) + return + } + } +} + +func TestMain(m *testing.M) { + // create the test directory + mkdirp("test", "a", "b", "c") + mkdirp("test", "a", "c") + mkdirp("test", "abc") + mkdirp("test", "axbxcxdxe", "xxx") + mkdirp("test", "axbxcxdxexxx") + mkdirp("test", "b") + + // create test files + touch("test", "a", "abc") + touch("test", "a", "b", "c", "d") + touch("test", "a", "c", "b") + touch("test", "abc", "b") + touch("test", "abcd") + touch("test", "abcde") + touch("test", "abxbbxdbxebxczzx") + touch("test", "abxbbxdbxebxczzy") + touch("test", "axbxcxdxe", "f") + touch("test", "axbxcxdxe", "xxx", "f") + touch("test", "axbxcxdxexxx", "f") + touch("test", "axbxcxdxexxx", "fff") + touch("test", "a☺b") + touch("test", "b", "c") + touch("test", "c") + touch("test", "x") + touch("test", "xxx") + touch("test", "z") + touch("test", "α") + touch("test", "abc", "【test】.txt") + + if !onWindows { + // these files/symlinks won't work on Windows + touch("test", "-") + touch("test", "]") + symlink("../axbxcxdxe/", "test/b/symlink-dir") + symlink("/tmp/nonexistant-file-20160902155705", "test/broken-symlink") + symlink("a/b", "test/working-symlink") + } + + // os.Exit(m.Run()) + exitCode := m.Run() + _ = os.RemoveAll("test") + os.Exit(exitCode) +} diff --git a/cli/internal/doublestar/glob.go b/cli/internal/doublestar/glob.go new file mode 100644 index 0000000..eee8920 --- /dev/null +++ b/cli/internal/doublestar/glob.go @@ -0,0 +1,393 @@ +// Package doublestar is adapted from https://github.com/bmatcuk/doublestar +// Copyright Bob Matcuk. All Rights Reserved. +// SPDX-License-Identifier: MIT +package doublestar + +import ( + "io/fs" + "path" +) + +// Glob returns the names of all files matching pattern or nil if there is no +// matching file. The syntax of pattern is the same as in Match(). The pattern +// may describe hierarchical names such as usr/*/bin/ed. +// +// Glob ignores file system errors such as I/O errors reading directories. +// The only possible returned error is ErrBadPattern, reporting that the +// pattern is malformed. +// +// Note: this is meant as a drop-in replacement for io/fs.Glob(). Like +// io/fs.Glob(), this function assumes that your pattern uses `/` as the path +// separator even if that's not correct for your OS (like Windows). If you +// aren't sure if that's the case, you can use filepath.ToSlash() on your +// pattern before calling Glob(). +// +// Like `io/fs.Glob()`, patterns containing `/./`, `/../`, or starting with `/` +// will return no results and no errors. You can use SplitPattern to divide a +// pattern into a base path (to initialize an `FS` object) and pattern. +func Glob(fsys fs.FS, pattern string) ([]string, error) { + if !ValidatePattern(pattern) { + return nil, ErrBadPattern + } + if hasMidDoubleStar(pattern) { + // If the pattern has a `**` anywhere but the very end, GlobWalk is more + // performant because it can get away with less allocations. If the pattern + // ends in a `**`, both methods are pretty much the same, but Glob has a + // _very_ slight advantage because of lower function call overhead. + var matches []string + err := doGlobWalk(fsys, pattern, true, func(p string, d fs.DirEntry) error { + matches = append(matches, p) + return nil + }) + return matches, err + } + return doGlob(fsys, pattern, nil, true) +} + +// Does the actual globbin' +func doGlob(fsys fs.FS, pattern string, m []string, firstSegment bool) ([]string, error) { + matches := m + patternStart := indexMeta(pattern) + if patternStart == -1 { + // pattern doesn't contain any meta characters - does a file matching the + // pattern exist? + if exists(fsys, pattern) { + matches = append(matches, pattern) + } + return matches, nil + } + + dir := "." + splitIdx := lastIndexSlashOrAlt(pattern) + if splitIdx != -1 { + if pattern[splitIdx] == '}' { + openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx]) + if openingIdx == -1 { + // if there's no matching opening index, technically Match() will treat + // an unmatched `}` as nothing special, so... we will, too! + splitIdx = lastIndexSlash(pattern[:splitIdx]) + } else { + // otherwise, we have to handle the alts: + return globAlts(fsys, pattern, openingIdx, splitIdx, matches, firstSegment) + } + } + + dir = pattern[:splitIdx] + pattern = pattern[splitIdx+1:] + } + + // if `splitIdx` is less than `patternStart`, we know `dir` has no meta + // characters. They would be equal if they are both -1, which means `dir` + // will be ".", and we know that doesn't have meta characters either. + if splitIdx <= patternStart { + return globDir(fsys, dir, pattern, matches, firstSegment) + } + + var dirs []string + var err error + dirs, err = doGlob(fsys, dir, matches, false) + if err != nil { + return nil, err + } + for _, d := range dirs { + matches, err = globDir(fsys, d, pattern, matches, firstSegment) + if err != nil { + return nil, err + } + } + + return matches, nil +} + +// handle alts in the glob pattern - `openingIdx` and `closingIdx` are the +// indexes of `{` and `}`, respectively +func globAlts(fsys fs.FS, pattern string, openingIdx, closingIdx int, m []string, firstSegment bool) ([]string, error) { + matches := m + + var dirs []string + startIdx := 0 + afterIdx := closingIdx + 1 + splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx]) + if splitIdx == -1 || pattern[splitIdx] == '}' { + // no common prefix + dirs = []string{""} + } else { + // our alts have a common prefix that we can process first + var err error + dirs, err = doGlob(fsys, pattern[:splitIdx], matches, false) + if err != nil { + return nil, err + } + + startIdx = splitIdx + 1 + } + + for _, d := range dirs { + patIdx := openingIdx + 1 + altResultsStartIdx := len(matches) + thisResultStartIdx := altResultsStartIdx + for patIdx < closingIdx { + nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true) + if nextIdx == -1 { + nextIdx = closingIdx + } else { + nextIdx += patIdx + } + + alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx) + var err error + matches, err = doGlob(fsys, alt, matches, firstSegment) + if err != nil { + return nil, err + } + + matchesLen := len(matches) + if altResultsStartIdx != thisResultStartIdx && thisResultStartIdx != matchesLen { + // Alts can result in matches that aren't sorted, or, worse, duplicates + // (consider the trivial pattern `path/to/{a,*}`). Since doGlob returns + // sorted results, we can do a sort of in-place merge and remove + // duplicates. But, we only need to do this if this isn't the first alt + // (ie, `altResultsStartIdx != thisResultsStartIdx`) and if the latest + // alt actually added some matches (`thisResultStartIdx != + // len(matches)`) + matches = sortAndRemoveDups(matches, altResultsStartIdx, thisResultStartIdx, matchesLen) + + // length of matches may have changed + thisResultStartIdx = len(matches) + } else { + thisResultStartIdx = matchesLen + } + + patIdx = nextIdx + 1 + } + } + + return matches, nil +} + +// find files/subdirectories in the given `dir` that match `pattern` +func globDir(fsys fs.FS, dir, pattern string, matches []string, canMatchFiles bool) ([]string, error) { + m := matches + + if pattern == "" { + // pattern can be an empty string if the original pattern ended in a slash, + // in which case, we should just return dir, but only if it actually exists + // and it's a directory (or a symlink to a directory) + if isPathDir(fsys, dir) { + m = append(m, dir) + } + return m, nil + } + + if pattern == "**" { + m = globDoubleStar(fsys, dir, m, canMatchFiles) + return m, nil + } + + dirs, err := fs.ReadDir(fsys, dir) + if err != nil { + // ignore IO errors + return m, nil + } + + var matched bool + for _, info := range dirs { + name := info.Name() + if canMatchFiles || isDir(fsys, dir, name, info) { + matched, err = matchWithSeparator(pattern, name, '/', false) + if err != nil { + return nil, err + } + if matched { + m = append(m, path.Join(dir, name)) + } + } + } + + return m, nil +} + +func globDoubleStar(fsys fs.FS, dir string, matches []string, canMatchFiles bool) []string { + dirs, err := fs.ReadDir(fsys, dir) + if err != nil { + // ignore IO errors + return matches + } + + // `**` can match *this* dir, so add it + matches = append(matches, dir) + for _, info := range dirs { + name := info.Name() + if isDir(fsys, dir, name, info) { + matches = globDoubleStar(fsys, path.Join(dir, name), matches, canMatchFiles) + } else if canMatchFiles { + matches = append(matches, path.Join(dir, name)) + } + } + + return matches +} + +// Returns true if the pattern has a doublestar in the middle of the pattern. +// In this case, GlobWalk is faster because it can get away with less +// allocations. However, Glob has a _very_ slight edge if the pattern ends in +// `**`. +func hasMidDoubleStar(p string) bool { + // subtract 3: 2 because we want to return false if the pattern ends in `**` + // (Glob is _very_ slightly faster in that case), and the extra 1 because our + // loop checks p[i] and p[i+1]. + l := len(p) - 3 + for i := 0; i < l; i++ { + if p[i] == '\\' { + // escape next byte + i++ + } else if p[i] == '*' && p[i+1] == '*' { + return true + } + } + return false +} + +// Returns the index of the first unescaped meta character, or negative 1. +func indexMeta(s string) int { + var c byte + l := len(s) + for i := 0; i < l; i++ { + c = s[i] + if c == '*' || c == '?' || c == '[' || c == '{' { + return i + } else if c == '\\' { + // skip next byte + i++ + } + } + return -1 +} + +// Returns the index of the last unescaped slash or closing alt (`}`) in the +// string, or negative 1. +func lastIndexSlashOrAlt(s string) int { + for i := len(s) - 1; i >= 0; i-- { + if (s[i] == '/' || s[i] == '}') && (i == 0 || s[i-1] != '\\') { + return i + } + } + return -1 +} + +// Returns the index of the last unescaped slash in the string, or negative 1. +func lastIndexSlash(s string) int { + for i := len(s) - 1; i >= 0; i-- { + if s[i] == '/' && (i == 0 || s[i-1] != '\\') { + return i + } + } + return -1 +} + +// Assuming the byte after the end of `s` is a closing `}`, this function will +// find the index of the matching `{`. That is, it'll skip over any nested `{}` +// and account for escaping. +func indexMatchedOpeningAlt(s string) int { + alts := 1 + for i := len(s) - 1; i >= 0; i-- { + if s[i] == '}' && (i == 0 || s[i-1] != '\\') { + alts++ + } else if s[i] == '{' && (i == 0 || s[i-1] != '\\') { + if alts--; alts == 0 { + return i + } + } + } + return -1 +} + +// Returns true if the path exists +func exists(fsys fs.FS, name string) bool { + if _, err := fs.Stat(fsys, name); err != nil { + return false + } + return true +} + +// Returns true if the path is a directory, or a symlink to a directory +func isPathDir(fsys fs.FS, name string) bool { + info, err := fs.Stat(fsys, name) + if err != nil { + return false + } + return info.IsDir() +} + +// Returns whether or not the given DirEntry is a directory. If the DirEntry +// represents a symbolic link, return false +func isDir(fsys fs.FS, dir string, name string, info fs.DirEntry) bool { + if (info.Type() & fs.ModeSymlink) > 0 { + return false + } + return info.IsDir() +} + +// Builds a string from an alt +func buildAlt(prefix, pattern string, startIdx, openingIdx, currentIdx, nextIdx, afterIdx int) string { + // pattern: + // ignored/start{alts,go,here}remaining - len = 36 + // | | | | ^--- afterIdx = 27 + // | | | \--------- nextIdx = 21 + // | | \----------- currentIdx = 19 + // | \----------------- openingIdx = 13 + // \---------------------- startIdx = 8 + // + // result: + // prefix/startgoremaining - len = 7 + 5 + 2 + 9 = 23 + var buf []byte + patLen := len(pattern) + size := (openingIdx - startIdx) + (nextIdx - currentIdx) + (patLen - afterIdx) + if prefix != "" { + buf = make([]byte, 0, size+len(prefix)+1) + buf = append(buf, prefix...) + buf = append(buf, '/') + } else { + buf = make([]byte, 0, size) + } + buf = append(buf, pattern[startIdx:openingIdx]...) + buf = append(buf, pattern[currentIdx:nextIdx]...) + if afterIdx < patLen { + buf = append(buf, pattern[afterIdx:]...) + } + return string(buf) +} + +// Running alts can produce results that are not sorted, and, worse, can cause +// duplicates (consider the trivial pattern `path/to/{a,*}`). Since we know +// each run of doGlob is sorted, we can basically do the "merge" step of a +// merge sort in-place. +func sortAndRemoveDups(matches []string, idx1, idx2, l int) []string { + var tmp string + for ; idx1 < idx2; idx1++ { + if matches[idx1] < matches[idx2] { + // order is correct + continue + } else if matches[idx1] > matches[idx2] { + // need to swap and then re-sort matches above idx2 + tmp = matches[idx1] + matches[idx1] = matches[idx2] + + shft := idx2 + 1 + for ; shft < l && matches[shft] < tmp; shft++ { + matches[shft-1] = matches[shft] + } + matches[shft-1] = tmp + } else { + // duplicate - shift matches above idx2 down one and decrement l + for shft := idx2 + 1; shft < l; shft++ { + matches[shft-1] = matches[shft] + } + if l--; idx2 == l { + // nothing left to do... matches[idx2:] must have been full of dups + break + } + } + } + return matches[:l] +} diff --git a/cli/internal/doublestar/globwalk.go b/cli/internal/doublestar/globwalk.go new file mode 100644 index 0000000..6caec3e --- /dev/null +++ b/cli/internal/doublestar/globwalk.go @@ -0,0 +1,277 @@ +// Package doublestar is adapted from https://github.com/bmatcuk/doublestar +// Copyright Bob Matcuk. All Rights Reserved. +// SPDX-License-Identifier: MIT +package doublestar + +import ( + "io/fs" + "path" +) + +// GlobWalkFunc is a callback function for GlobWalk(). If the function returns an error, GlobWalk +// will end immediately and return the same error. +type GlobWalkFunc func(path string, d fs.DirEntry) error + +// GlobWalk calls the callback function `fn` for every file matching pattern. +// The syntax of pattern is the same as in Match() and the behavior is the same +// as Glob(), with regard to limitations (such as patterns containing `/./`, +// `/../`, or starting with `/`). The pattern may describe hierarchical names +// such as usr/*/bin/ed. +// +// GlobWalk may have a small performance benefit over Glob if you do not need a +// slice of matches because it can avoid allocating memory for the matches. +// Additionally, GlobWalk gives you access to the `fs.DirEntry` objects for +// each match, and lets you quit early by returning a non-nil error from your +// callback function. +// +// GlobWalk ignores file system errors such as I/O errors reading directories. +// GlobWalk may return ErrBadPattern, reporting that the pattern is malformed. +// Additionally, if the callback function `fn` returns an error, GlobWalk will +// exit immediately and return that error. +// +// Like Glob(), this function assumes that your pattern uses `/` as the path +// separator even if that's not correct for your OS (like Windows). If you +// aren't sure if that's the case, you can use filepath.ToSlash() on your +// pattern before calling GlobWalk(). +func GlobWalk(fsys fs.FS, pattern string, fn GlobWalkFunc) error { + if !ValidatePattern(pattern) { + return ErrBadPattern + } + return doGlobWalk(fsys, pattern, true, fn) +} + +// Actually execute GlobWalk +func doGlobWalk(fsys fs.FS, pattern string, firstSegment bool, fn GlobWalkFunc) error { + patternStart := indexMeta(pattern) + if patternStart == -1 { + // pattern doesn't contain any meta characters - does a file matching the + // pattern exist? + info, err := fs.Stat(fsys, pattern) + if err == nil { + err = fn(pattern, newDirEntryFromFileInfo(info)) + return err + } + // ignore IO errors + return nil + } + + dir := "." + splitIdx := lastIndexSlashOrAlt(pattern) + if splitIdx != -1 { + if pattern[splitIdx] == '}' { + openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx]) + if openingIdx == -1 { + // if there's no matching opening index, technically Match() will treat + // an unmatched `}` as nothing special, so... we will, too! + splitIdx = lastIndexSlash(pattern[:splitIdx]) + } else { + // otherwise, we have to handle the alts: + return globAltsWalk(fsys, pattern, openingIdx, splitIdx, firstSegment, fn) + } + } + + dir = pattern[:splitIdx] + pattern = pattern[splitIdx+1:] + } + + // if `splitIdx` is less than `patternStart`, we know `dir` has no meta + // characters. They would be equal if they are both -1, which means `dir` + // will be ".", and we know that doesn't have meta characters either. + if splitIdx <= patternStart { + return globDirWalk(fsys, dir, pattern, firstSegment, fn) + } + + return doGlobWalk(fsys, dir, false, func(p string, d fs.DirEntry) error { + if err := globDirWalk(fsys, p, pattern, firstSegment, fn); err != nil { + return err + } + return nil + }) +} + +// handle alts in the glob pattern - `openingIdx` and `closingIdx` are the +// indexes of `{` and `}`, respectively +func globAltsWalk(fsys fs.FS, pattern string, openingIdx, closingIdx int, firstSegment bool, fn GlobWalkFunc) error { + var matches []dirEntryWithFullPath + startIdx := 0 + afterIdx := closingIdx + 1 + splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx]) + if splitIdx == -1 || pattern[splitIdx] == '}' { + // no common prefix + var err error + matches, err = doGlobAltsWalk(fsys, "", pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, matches) + if err != nil { + return err + } + } else { + // our alts have a common prefix that we can process first + startIdx = splitIdx + 1 + err := doGlobWalk(fsys, pattern[:splitIdx], false, func(p string, d fs.DirEntry) (e error) { + matches, e = doGlobAltsWalk(fsys, p, pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, matches) + return e + }) + if err != nil { + return err + } + } + + for _, m := range matches { + if err := fn(m.Path, m.Entry); err != nil { + return err + } + } + + return nil +} + +// runs actual matching for alts +func doGlobAltsWalk(fsys fs.FS, d, pattern string, startIdx, openingIdx, closingIdx, afterIdx int, firstSegment bool, m []dirEntryWithFullPath) ([]dirEntryWithFullPath, error) { + matches := m + matchesLen := len(m) + patIdx := openingIdx + 1 + for patIdx < closingIdx { + nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true) + if nextIdx == -1 { + nextIdx = closingIdx + } else { + nextIdx += patIdx + } + + alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx) + err := doGlobWalk(fsys, alt, firstSegment, func(p string, d fs.DirEntry) error { + // insertion sort, ignoring dups + insertIdx := matchesLen + for insertIdx > 0 && matches[insertIdx-1].Path > p { + insertIdx-- + } + if insertIdx > 0 && matches[insertIdx-1].Path == p { + // dup + return nil + } + + // append to grow the slice, then insert + entry := dirEntryWithFullPath{d, p} + matches = append(matches, entry) + for i := matchesLen; i > insertIdx; i-- { + matches[i] = matches[i-1] + } + matches[insertIdx] = entry + matchesLen++ + + return nil + }) + if err != nil { + return nil, err + } + + patIdx = nextIdx + 1 + } + + return matches, nil +} + +func globDirWalk(fsys fs.FS, dir, pattern string, canMatchFiles bool, fn GlobWalkFunc) error { + if pattern == "" { + // pattern can be an empty string if the original pattern ended in a slash, + // in which case, we should just return dir, but only if it actually exists + // and it's a directory (or a symlink to a directory) + info, err := fs.Stat(fsys, dir) + if err != nil || !info.IsDir() { + return nil + } + return fn(dir, newDirEntryFromFileInfo(info)) + } + + if pattern == "**" { + // `**` can match *this* dir + info, err := fs.Stat(fsys, dir) + if err != nil || !info.IsDir() { + return nil + } + if err = fn(dir, newDirEntryFromFileInfo(info)); err != nil { + return err + } + return globDoubleStarWalk(fsys, dir, canMatchFiles, fn) + } + + dirs, err := fs.ReadDir(fsys, dir) + if err != nil { + // ignore IO errors + return nil + } + + var matched bool + for _, info := range dirs { + name := info.Name() + if canMatchFiles || isDir(fsys, dir, name, info) { + matched, err = matchWithSeparator(pattern, name, '/', false) + if err != nil { + return err + } + if matched { + if err = fn(path.Join(dir, name), info); err != nil { + return err + } + } + } + } + + return nil +} + +func globDoubleStarWalk(fsys fs.FS, dir string, canMatchFiles bool, fn GlobWalkFunc) error { + dirs, err := fs.ReadDir(fsys, dir) + if err != nil { + // ignore IO errors + return nil + } + + // `**` can match *this* dir, so add it + for _, info := range dirs { + name := info.Name() + if isDir(fsys, dir, name, info) { + p := path.Join(dir, name) + if e := fn(p, info); e != nil { + return e + } + if e := globDoubleStarWalk(fsys, p, canMatchFiles, fn); e != nil { + return e + } + } else if canMatchFiles { + if e := fn(path.Join(dir, name), info); e != nil { + return e + } + } + } + + return nil +} + +type dirEntryFromFileInfo struct { + fi fs.FileInfo +} + +func (d *dirEntryFromFileInfo) Name() string { + return d.fi.Name() +} + +func (d *dirEntryFromFileInfo) IsDir() bool { + return d.fi.IsDir() +} + +func (d *dirEntryFromFileInfo) Type() fs.FileMode { + return d.fi.Mode().Type() +} + +func (d *dirEntryFromFileInfo) Info() (fs.FileInfo, error) { + return d.fi, nil +} + +func newDirEntryFromFileInfo(fi fs.FileInfo) fs.DirEntry { + return &dirEntryFromFileInfo{fi} +} + +type dirEntryWithFullPath struct { + Entry fs.DirEntry + Path string +} diff --git a/cli/internal/doublestar/match.go b/cli/internal/doublestar/match.go new file mode 100644 index 0000000..d8c9536 --- /dev/null +++ b/cli/internal/doublestar/match.go @@ -0,0 +1,377 @@ +// Package doublestar is adapted from https://github.com/bmatcuk/doublestar +// Copyright Bob Matcuk. All Rights Reserved. +// SPDX-License-Identifier: MIT +package doublestar + +import ( + "path/filepath" + "unicode/utf8" +) + +// Match reports whether name matches the shell pattern. +// The pattern syntax is: +// +// pattern: +// { term } +// term: +// '*' matches any sequence of non-path-separators +// '/**/' matches zero or more directories +// '?' matches any single non-path-separator character +// '[' [ '^' '!' ] { character-range } ']' +// character class (must be non-empty) +// starting with `^` or `!` negates the class +// '{' { term } [ ',' { term } ... ] '}' +// alternatives +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range: +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// Match returns true if `name` matches the file name `pattern`. `name` and +// `pattern` are split on forward slash (`/`) characters and may be relative or +// absolute. +// +// Match requires pattern to match all of name, not just a substring. +// The only possible returned error is ErrBadPattern, when pattern +// is malformed. +// +// A doublestar (`**`) should appear surrounded by path separators such as +// `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar +// option: a pattern such as `path/to/**.txt` would return the same results as +// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`. +// +// Note: this is meant as a drop-in replacement for path.Match() which +// always uses '/' as the path separator. If you want to support systems +// which use a different path separator (such as Windows), what you want +// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both +// pattern and name and then use this function. +func Match(pattern, name string) (bool, error) { + return matchWithSeparator(pattern, name, '/', true) +} + +// PathMatch returns true if `name` matches the file name `pattern`. The +// difference between Match and PathMatch is that PathMatch will automatically +// use your system's path separator to split `name` and `pattern`. On systems +// where the path separator is `'\'`, escaping will be disabled. +// +// Note: this is meant as a drop-in replacement for filepath.Match(). It +// assumes that both `pattern` and `name` are using the system's path +// separator. If you can't be sure of that, use filepath.ToSlash() on both +// `pattern` and `name`, and then use the Match() function instead. +func PathMatch(pattern, name string) (bool, error) { + return matchWithSeparator(pattern, name, filepath.Separator, true) +} + +func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) { + doublestarPatternBacktrack := -1 + doublestarNameBacktrack := -1 + starPatternBacktrack := -1 + starNameBacktrack := -1 + patIdx := 0 + nameIdx := 0 + patLen := len(pattern) + nameLen := len(name) + startOfSegment := true +MATCH: + for nameIdx < nameLen { + if patIdx < patLen { + switch pattern[patIdx] { + case '*': + if patIdx++; patIdx < patLen && pattern[patIdx] == '*' { + // doublestar - must begin with a path separator, otherwise we'll + // treat it like a single star like bash + patIdx++ + if startOfSegment { + if patIdx >= patLen { + // pattern ends in `/**`: return true + return true, nil + } + + // doublestar must also end with a path separator, otherwise we're + // just going to treat the doublestar as a single star like bash + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + if patRune == separator { + patIdx += patRuneLen + + doublestarPatternBacktrack = patIdx + doublestarNameBacktrack = nameIdx + starPatternBacktrack = -1 + starNameBacktrack = -1 + continue + } + } + } + startOfSegment = false + + starPatternBacktrack = patIdx + starNameBacktrack = nameIdx + continue + + case '?': + startOfSegment = false + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + if nameRune == separator { + // `?` cannot match the separator + break + } + + patIdx++ + nameIdx += nameRuneLen + continue + + case '[': + startOfSegment = false + if patIdx++; patIdx >= patLen { + // class didn't end + return false, ErrBadPattern + } + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + + matched := false + negate := pattern[patIdx] == '!' || pattern[patIdx] == '^' + if negate { + patIdx++ + } + + if patIdx >= patLen || pattern[patIdx] == ']' { + // class didn't end or empty character class + return false, ErrBadPattern + } + + last := utf8.MaxRune + for patIdx < patLen && pattern[patIdx] != ']' { + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + + // match a range + if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' { + if pattern[patIdx] == '\\' { + // next character is escaped + patIdx++ + } + patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + + if last <= nameRune && nameRune <= patRune { + matched = true + break + } + + // didn't match range - reset `last` + last = utf8.MaxRune + continue + } + + // not a range - check if the next rune is escaped + if patRune == '\\' { + patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + } + + // check if the rune matches + if patRune == nameRune { + matched = true + break + } + + // no matches yet + last = patRune + } + + if matched == negate { + // failed to match - if we reached the end of the pattern, that means + // we never found a closing `]` + if patIdx >= patLen { + return false, ErrBadPattern + } + break + } + + closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true) + if closingIdx == -1 { + // no closing `]` + return false, ErrBadPattern + } + + patIdx += closingIdx + 1 + nameIdx += nameRuneLen + continue + + case '{': + // Note: removed 'startOfSegment = false' here. + // This block is guaranteed to return, so assigning it was useless + // and triggering a lint error + patIdx++ + closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\') + if closingIdx == -1 { + // no closing `}` + return false, ErrBadPattern + } + closingIdx += patIdx + + for { + commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') + if commaIdx == -1 { + break + } + commaIdx += patIdx + + result, err := matchWithSeparator(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name[nameIdx:], separator, validate) + if result || err != nil { + return result, err + } + + patIdx = commaIdx + 1 + } + return matchWithSeparator(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name[nameIdx:], separator, validate) + + case '\\': + if separator != '\\' { + // next rune is "escaped" in the pattern - literal match + if patIdx++; patIdx >= patLen { + // pattern ended + return false, ErrBadPattern + } + } + fallthrough + + default: + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + if patRune != nameRune { + if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' { + // if this rune was meant to be escaped, we need to move patIdx + // back to the backslash before backtracking or validating below + patIdx-- + } + break + } + + patIdx += patRuneLen + nameIdx += nameRuneLen + startOfSegment = patRune == separator + continue + } + } + + if starPatternBacktrack >= 0 { + // `*` backtrack, but only if the `name` rune isn't the separator + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:]) + if nameRune != separator { + starNameBacktrack += nameRuneLen + patIdx = starPatternBacktrack + nameIdx = starNameBacktrack + startOfSegment = false + continue + } + } + + if doublestarPatternBacktrack >= 0 { + // `**` backtrack, advance `name` past next separator + nameIdx = doublestarNameBacktrack + for nameIdx < nameLen { + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + nameIdx += nameRuneLen + if nameRune == separator { + doublestarNameBacktrack = nameIdx + patIdx = doublestarPatternBacktrack + startOfSegment = true + continue MATCH + } + } + } + + if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) { + return false, ErrBadPattern + } + return false, nil + } + + if nameIdx < nameLen { + // we reached the end of `pattern` before the end of `name` + return false, nil + } + + // we've reached the end of `name`; we've successfully matched if we've also + // reached the end of `pattern`, or if the rest of `pattern` can match a + // zero-length string + return isZeroLengthPattern(pattern[patIdx:], separator) +} + +func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) { + // `/**` is a special case - a pattern such as `path/to/a/**` *should* match + // `path/to/a` because `a` might be a directory + if pattern == "" || pattern == "*" || pattern == "**" || pattern == string(separator)+"**" { + return true, nil + } + + if pattern[0] == '{' { + closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\') + if closingIdx == -1 { + // no closing '}' + return false, ErrBadPattern + } + closingIdx++ + + patIdx := 1 + for { + commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') + if commaIdx == -1 { + break + } + commaIdx += patIdx + + ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator) + if ret || err != nil { + return + } + + patIdx = commaIdx + 1 + } + return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator) + } + + // no luck - validate the rest of the pattern + if !doValidatePattern(pattern, separator) { + return false, ErrBadPattern + } + return false, nil +} + +// Finds the index of the first unescaped byte `c`, or negative 1. +func indexUnescapedByte(s string, c byte, allowEscaping bool) int { + l := len(s) + for i := 0; i < l; i++ { + if allowEscaping && s[i] == '\\' { + // skip next byte + i++ + } else if s[i] == c { + return i + } + } + return -1 +} + +// Assuming the byte before the beginning of `s` is an opening `{`, this +// function will find the index of the matching `}`. That is, it'll skip over +// any nested `{}` and account for escaping +func indexMatchedClosingAlt(s string, allowEscaping bool) int { + alts := 1 + l := len(s) + for i := 0; i < l; i++ { + if allowEscaping && s[i] == '\\' { + // skip next byte + i++ + } else if s[i] == '{' { + alts++ + } else if s[i] == '}' { + if alts--; alts == 0 { + return i + } + } + } + return -1 +} diff --git a/cli/internal/doublestar/utils.go b/cli/internal/doublestar/utils.go new file mode 100644 index 0000000..7236cd0 --- /dev/null +++ b/cli/internal/doublestar/utils.go @@ -0,0 +1,71 @@ +// Package doublestar is adapted from https://github.com/bmatcuk/doublestar +// Copyright Bob Matcuk. All Rights Reserved. +// SPDX-License-Identifier: MIT +package doublestar + +// SplitPattern is a utility function. Given a pattern, SplitPattern will +// return two strings: the first string is everything up to the last slash +// (`/`) that appears _before_ any unescaped "meta" characters (ie, `*?[{`). +// The second string is everything after that slash. For example, given the +// pattern: +// +// ../../path/to/meta*/** +// ^----------- split here +// +// SplitPattern returns "../../path/to" and "meta*/**". This is useful for +// initializing os.DirFS() to call Glob() because Glob() will silently fail if +// your pattern includes `/./` or `/../`. For example: +// +// base, pattern := SplitPattern("../../path/to/meta*/**") +// fsys := os.DirFS(base) +// matches, err := Glob(fsys, pattern) +// +// If SplitPattern cannot find somewhere to split the pattern (for example, +// `meta*/**`), it will return "." and the unaltered pattern (`meta*/**` in +// this example). +// +// Of course, it is your responsibility to decide if the returned base path is +// "safe" in the context of your application. Perhaps you could use Match() to +// validate against a list of approved base directories? +func SplitPattern(p string) (string, string) { + base := "." + pattern := p + + splitIdx := -1 + for i := 0; i < len(p); i++ { + c := p[i] + if c == '\\' { + i++ + } else if c == '/' { + splitIdx = i + } else if c == '*' || c == '?' || c == '[' || c == '{' { + break + } + } + + if splitIdx >= 0 { + return p[:splitIdx], p[splitIdx+1:] + } + + return base, pattern +} + +// Finds the next comma, but ignores any commas that appear inside nested `{}`. +// Assumes that each opening bracket has a corresponding closing bracket. +func indexNextAlt(s string, allowEscaping bool) int { + alts := 1 + l := len(s) + for i := 0; i < l; i++ { + if allowEscaping && s[i] == '\\' { + // skip next byte + i++ + } else if s[i] == '{' { + alts++ + } else if s[i] == '}' { + alts-- + } else if s[i] == ',' && alts == 1 { + return i + } + } + return -1 +} diff --git a/cli/internal/doublestar/validate.go b/cli/internal/doublestar/validate.go new file mode 100644 index 0000000..225fc5e --- /dev/null +++ b/cli/internal/doublestar/validate.go @@ -0,0 +1,83 @@ +// Package doublestar is adapted from https://github.com/bmatcuk/doublestar +// Copyright Bob Matcuk. All Rights Reserved. +// SPDX-License-Identifier: MIT +package doublestar + +import "path/filepath" + +// ValidatePattern validates a pattern. Patterns are validated while they run in Match(), +// PathMatch(), and Glob(), so, you normally wouldn't need to call this. +// However, there are cases where this might be useful: for example, if your +// program allows a user to enter a pattern that you'll run at a later time, +// you might want to validate it. +// +// ValidatePattern assumes your pattern uses '/' as the path separator. +func ValidatePattern(s string) bool { + return doValidatePattern(s, '/') +} + +// ValidatePathPattern only uses your OS path separator. In other words, use +// ValidatePattern if you would normally use Match() or Glob(). Use +// ValidatePathPattern if you would normally use PathMatch(). Keep in mind, +// Glob() requires '/' separators, even if your OS uses something else. +func ValidatePathPattern(s string) bool { + return doValidatePattern(s, filepath.Separator) +} + +func doValidatePattern(s string, separator rune) bool { + altDepth := 0 + l := len(s) +VALIDATE: + for i := 0; i < l; i++ { + switch s[i] { + case '\\': + if separator != '\\' { + // skip the next byte - return false if there is no next byte + if i++; i >= l { + return false + } + } + continue + + case '[': + if i++; i >= l { + // class didn't end + return false + } + if s[i] == '^' || s[i] == '!' { + i++ + } + if i >= l || s[i] == ']' { + // class didn't end or empty character class + return false + } + + for ; i < l; i++ { + if separator != '\\' && s[i] == '\\' { + i++ + } else if s[i] == ']' { + // looks good + continue VALIDATE + } + } + + // class didn't end + return false + + case '{': + altDepth++ + continue + + case '}': + if altDepth == 0 { + // alt end without a corresponding start + return false + } + altDepth-- + continue + } + } + + // valid as long as all alts are closed + return altDepth == 0 +} -- cgit v1.2.3-70-g09d2