diff options
| author | 2023-04-28 01:36:44 +0800 | |
|---|---|---|
| committer | 2023-04-28 01:36:44 +0800 | |
| commit | dd84b9d64fb98746a230cd24233ff50a562c39c9 (patch) | |
| tree | b583261ef00b3afe72ec4d6dacb31e57779a6faf /cli/internal/doublestar/match.go | |
| parent | 0b46fcd72ac34382387b2bcf9095233efbcc52f4 (diff) | |
| download | HydroRoll-dd84b9d64fb98746a230cd24233ff50a562c39c9.tar.gz HydroRoll-dd84b9d64fb98746a230cd24233ff50a562c39c9.zip | |
Diffstat (limited to 'cli/internal/doublestar/match.go')
| -rw-r--r-- | cli/internal/doublestar/match.go | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/cli/internal/doublestar/match.go b/cli/internal/doublestar/match.go new file mode 100644 index 0000000..d8c9536 --- /dev/null +++ b/cli/internal/doublestar/match.go @@ -0,0 +1,377 @@ +// Package doublestar is adapted from https://github.com/bmatcuk/doublestar +// Copyright Bob Matcuk. All Rights Reserved. +// SPDX-License-Identifier: MIT +package doublestar + +import ( + "path/filepath" + "unicode/utf8" +) + +// Match reports whether name matches the shell pattern. +// The pattern syntax is: +// +// pattern: +// { term } +// term: +// '*' matches any sequence of non-path-separators +// '/**/' matches zero or more directories +// '?' matches any single non-path-separator character +// '[' [ '^' '!' ] { character-range } ']' +// character class (must be non-empty) +// starting with `^` or `!` negates the class +// '{' { term } [ ',' { term } ... ] '}' +// alternatives +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range: +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// Match returns true if `name` matches the file name `pattern`. `name` and +// `pattern` are split on forward slash (`/`) characters and may be relative or +// absolute. +// +// Match requires pattern to match all of name, not just a substring. +// The only possible returned error is ErrBadPattern, when pattern +// is malformed. +// +// A doublestar (`**`) should appear surrounded by path separators such as +// `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar +// option: a pattern such as `path/to/**.txt` would return the same results as +// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`. +// +// Note: this is meant as a drop-in replacement for path.Match() which +// always uses '/' as the path separator. If you want to support systems +// which use a different path separator (such as Windows), what you want +// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both +// pattern and name and then use this function. +func Match(pattern, name string) (bool, error) { + return matchWithSeparator(pattern, name, '/', true) +} + +// PathMatch returns true if `name` matches the file name `pattern`. The +// difference between Match and PathMatch is that PathMatch will automatically +// use your system's path separator to split `name` and `pattern`. On systems +// where the path separator is `'\'`, escaping will be disabled. +// +// Note: this is meant as a drop-in replacement for filepath.Match(). It +// assumes that both `pattern` and `name` are using the system's path +// separator. If you can't be sure of that, use filepath.ToSlash() on both +// `pattern` and `name`, and then use the Match() function instead. +func PathMatch(pattern, name string) (bool, error) { + return matchWithSeparator(pattern, name, filepath.Separator, true) +} + +func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) { + doublestarPatternBacktrack := -1 + doublestarNameBacktrack := -1 + starPatternBacktrack := -1 + starNameBacktrack := -1 + patIdx := 0 + nameIdx := 0 + patLen := len(pattern) + nameLen := len(name) + startOfSegment := true +MATCH: + for nameIdx < nameLen { + if patIdx < patLen { + switch pattern[patIdx] { + case '*': + if patIdx++; patIdx < patLen && pattern[patIdx] == '*' { + // doublestar - must begin with a path separator, otherwise we'll + // treat it like a single star like bash + patIdx++ + if startOfSegment { + if patIdx >= patLen { + // pattern ends in `/**`: return true + return true, nil + } + + // doublestar must also end with a path separator, otherwise we're + // just going to treat the doublestar as a single star like bash + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + if patRune == separator { + patIdx += patRuneLen + + doublestarPatternBacktrack = patIdx + doublestarNameBacktrack = nameIdx + starPatternBacktrack = -1 + starNameBacktrack = -1 + continue + } + } + } + startOfSegment = false + + starPatternBacktrack = patIdx + starNameBacktrack = nameIdx + continue + + case '?': + startOfSegment = false + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + if nameRune == separator { + // `?` cannot match the separator + break + } + + patIdx++ + nameIdx += nameRuneLen + continue + + case '[': + startOfSegment = false + if patIdx++; patIdx >= patLen { + // class didn't end + return false, ErrBadPattern + } + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + + matched := false + negate := pattern[patIdx] == '!' || pattern[patIdx] == '^' + if negate { + patIdx++ + } + + if patIdx >= patLen || pattern[patIdx] == ']' { + // class didn't end or empty character class + return false, ErrBadPattern + } + + last := utf8.MaxRune + for patIdx < patLen && pattern[patIdx] != ']' { + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + + // match a range + if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' { + if pattern[patIdx] == '\\' { + // next character is escaped + patIdx++ + } + patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + + if last <= nameRune && nameRune <= patRune { + matched = true + break + } + + // didn't match range - reset `last` + last = utf8.MaxRune + continue + } + + // not a range - check if the next rune is escaped + if patRune == '\\' { + patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) + patIdx += patRuneLen + } + + // check if the rune matches + if patRune == nameRune { + matched = true + break + } + + // no matches yet + last = patRune + } + + if matched == negate { + // failed to match - if we reached the end of the pattern, that means + // we never found a closing `]` + if patIdx >= patLen { + return false, ErrBadPattern + } + break + } + + closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true) + if closingIdx == -1 { + // no closing `]` + return false, ErrBadPattern + } + + patIdx += closingIdx + 1 + nameIdx += nameRuneLen + continue + + case '{': + // Note: removed 'startOfSegment = false' here. + // This block is guaranteed to return, so assigning it was useless + // and triggering a lint error + patIdx++ + closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\') + if closingIdx == -1 { + // no closing `}` + return false, ErrBadPattern + } + closingIdx += patIdx + + for { + commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') + if commaIdx == -1 { + break + } + commaIdx += patIdx + + result, err := matchWithSeparator(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name[nameIdx:], separator, validate) + if result || err != nil { + return result, err + } + + patIdx = commaIdx + 1 + } + return matchWithSeparator(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name[nameIdx:], separator, validate) + + case '\\': + if separator != '\\' { + // next rune is "escaped" in the pattern - literal match + if patIdx++; patIdx >= patLen { + // pattern ended + return false, ErrBadPattern + } + } + fallthrough + + default: + patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + if patRune != nameRune { + if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' { + // if this rune was meant to be escaped, we need to move patIdx + // back to the backslash before backtracking or validating below + patIdx-- + } + break + } + + patIdx += patRuneLen + nameIdx += nameRuneLen + startOfSegment = patRune == separator + continue + } + } + + if starPatternBacktrack >= 0 { + // `*` backtrack, but only if the `name` rune isn't the separator + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:]) + if nameRune != separator { + starNameBacktrack += nameRuneLen + patIdx = starPatternBacktrack + nameIdx = starNameBacktrack + startOfSegment = false + continue + } + } + + if doublestarPatternBacktrack >= 0 { + // `**` backtrack, advance `name` past next separator + nameIdx = doublestarNameBacktrack + for nameIdx < nameLen { + nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) + nameIdx += nameRuneLen + if nameRune == separator { + doublestarNameBacktrack = nameIdx + patIdx = doublestarPatternBacktrack + startOfSegment = true + continue MATCH + } + } + } + + if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) { + return false, ErrBadPattern + } + return false, nil + } + + if nameIdx < nameLen { + // we reached the end of `pattern` before the end of `name` + return false, nil + } + + // we've reached the end of `name`; we've successfully matched if we've also + // reached the end of `pattern`, or if the rest of `pattern` can match a + // zero-length string + return isZeroLengthPattern(pattern[patIdx:], separator) +} + +func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) { + // `/**` is a special case - a pattern such as `path/to/a/**` *should* match + // `path/to/a` because `a` might be a directory + if pattern == "" || pattern == "*" || pattern == "**" || pattern == string(separator)+"**" { + return true, nil + } + + if pattern[0] == '{' { + closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\') + if closingIdx == -1 { + // no closing '}' + return false, ErrBadPattern + } + closingIdx++ + + patIdx := 1 + for { + commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') + if commaIdx == -1 { + break + } + commaIdx += patIdx + + ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator) + if ret || err != nil { + return + } + + patIdx = commaIdx + 1 + } + return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator) + } + + // no luck - validate the rest of the pattern + if !doValidatePattern(pattern, separator) { + return false, ErrBadPattern + } + return false, nil +} + +// Finds the index of the first unescaped byte `c`, or negative 1. +func indexUnescapedByte(s string, c byte, allowEscaping bool) int { + l := len(s) + for i := 0; i < l; i++ { + if allowEscaping && s[i] == '\\' { + // skip next byte + i++ + } else if s[i] == c { + return i + } + } + return -1 +} + +// Assuming the byte before the beginning of `s` is an opening `{`, this +// function will find the index of the matching `}`. That is, it'll skip over +// any nested `{}` and account for escaping +func indexMatchedClosingAlt(s string, allowEscaping bool) int { + alts := 1 + l := len(s) + for i := 0; i < l; i++ { + if allowEscaping && s[i] == '\\' { + // skip next byte + i++ + } else if s[i] == '{' { + alts++ + } else if s[i] == '}' { + if alts--; alts == 0 { + return i + } + } + } + return -1 +} |
