diff options
Diffstat (limited to 'cli/internal/encoding')
| -rw-r--r-- | cli/internal/encoding/gitoutput/gitoutput.go | 345 | ||||
| -rw-r--r-- | cli/internal/encoding/gitoutput/gitoutput_test.go | 377 | ||||
| -rw-r--r-- | cli/internal/encoding/gitoutput/validators.go | 148 | ||||
| -rw-r--r-- | cli/internal/encoding/gitoutput/validators_test.go | 514 |
4 files changed, 1384 insertions, 0 deletions
diff --git a/cli/internal/encoding/gitoutput/gitoutput.go b/cli/internal/encoding/gitoutput/gitoutput.go new file mode 100644 index 0000000..1c2ad4f --- /dev/null +++ b/cli/internal/encoding/gitoutput/gitoutput.go @@ -0,0 +1,345 @@ +// Package gitoutput reads the output of calls to `git`. +package gitoutput + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" +) + +// These describe the structure of fields in the output of `git` commands. +var ( + LsTreeFields = []Field{ObjectMode, ObjectType, ObjectName, Path} + LsFilesFields = []Field{ObjectMode, ObjectName, ObjectStage, Path} + StatusFields = []Field{StatusX, StatusY, Path} +) + +var _lsTreeFieldToIndex = map[Field]int{ + ObjectMode: 0, + ObjectType: 1, + ObjectName: 2, + Path: 3, +} + +var _lsFilesFieldToIndex = map[Field]int{ + ObjectMode: 0, + ObjectName: 1, + ObjectStage: 2, + Path: 3, +} + +var _statusFieldToIndex = map[Field]int{ + StatusX: 0, + StatusY: 1, + Path: 2, +} + +// Field is the type for fields available in outputs to `git`. +// Used for naming and sensible call sites. +type Field int + +const ( + // ObjectMode is the mode field from `git` outputs. e.g. 100644 + ObjectMode Field = iota + 1 + // ObjectType is the set of allowed types from `git` outputs: blob, tree, commit + ObjectType + // ObjectName is the 40-character SHA hash + ObjectName + // ObjectStage is a value 0-3. + ObjectStage + // StatusX is the first character of the two-character output from `git status`. + StatusX + // StatusY is the second character of the two-character output from `git status`. + StatusY + // Path is the file path under version control in `git`. + Path +) + +// LsTreeEntry is the result from call `git ls-files` +type LsTreeEntry []string + +// LsFilesEntry is the result from call `git ls-tree` +type LsFilesEntry []string + +// StatusEntry is the result from call `git status` +type StatusEntry []string + +// GetField returns the value of the specified field. +func (e LsTreeEntry) GetField(field Field) string { + value, exists := _lsTreeFieldToIndex[field] + if !exists { + panic("Received an invalid field for LsTreeEntry.") + } + return e[value] +} + +// GetField returns the value of the specified field. +func (e LsFilesEntry) GetField(field Field) string { + value, exists := _lsFilesFieldToIndex[field] + if !exists { + panic("Received an invalid field for LsFilesEntry.") + } + return e[value] +} + +// GetField returns the value of the specified field. +func (e StatusEntry) GetField(field Field) string { + value, exists := _statusFieldToIndex[field] + if !exists { + panic("Received an invalid field for StatusEntry.") + } + return e[value] +} + +// Separators that appear in the output of `git` commands. +const ( + _space = ' ' + _tab = '\t' + _nul = '\000' +) + +// A ParseError is returned for parsing errors. +// Entries and columns are both 1-indexed. +type ParseError struct { + Entry int // Entry where the error occurred + Column int // Column where the error occurred + Err error // The actual error +} + +// Error creates a string for a parse error. +func (e *ParseError) Error() string { + return fmt.Sprintf("parse error on entry %d, column %d: %v", e.Entry, e.Column, e.Err) +} + +// Unwrap returns the raw error. +func (e *ParseError) Unwrap() error { return e.Err } + +// These are the errors that can be returned in ParseError.Err. +var ( + ErrInvalidObjectMode = errors.New("object mode is not valid") + ErrInvalidObjectType = errors.New("object type is not valid") + ErrInvalidObjectName = errors.New("object name is not valid") + ErrInvalidObjectStage = errors.New("object stage is not valid") + ErrInvalidObjectStatusX = errors.New("object status x is not valid") + ErrInvalidObjectStatusY = errors.New("object status y is not valid") + ErrInvalidPath = errors.New("path is not valid") + ErrUnknownField = errors.New("unknown field") +) + +// A Reader reads records from `git`'s output`. +type Reader struct { + // ReuseRecord controls whether calls to Read may return a slice sharing + // the backing array of the previous call's returned slice for performance. + // By default, each call to Read returns newly allocated memory owned by the caller. + ReuseRecord bool + + // Fields specifies the type of each field. + Fields []Field + + reader *bufio.Reader + + // numEntry is the current entry being read in the `git` output. + numEntry int + + // rawBuffer is an entry buffer only used by the readEntry method. + rawBuffer []byte + + // recordBuffer holds the unescaped fields, one after another. + // The fields can be accessed by using the indexes in fieldIndexes. + recordBuffer []byte + + // fieldIndexes is an index of fields inside recordBuffer. + // The i'th field ends at offset fieldIndexes[i] in recordBuffer. + fieldIndexes []int + + // fieldPositions is an index of field positions for the + // last record returned by Read. + fieldPositions []position + + // lastRecord is a record cache and only used when ReuseRecord == true. + lastRecord []string +} + +// NewLSTreeReader returns a new Reader that reads from reader. +func NewLSTreeReader(reader io.Reader) *Reader { + return &Reader{ + reader: bufio.NewReader(reader), + Fields: LsTreeFields, + } +} + +// NewLSFilesReader returns a new Reader that reads from reader. +func NewLSFilesReader(reader io.Reader) *Reader { + return &Reader{ + reader: bufio.NewReader(reader), + Fields: LsFilesFields, + } +} + +// NewStatusReader returns a new Reader that reads from reader. +func NewStatusReader(reader io.Reader) *Reader { + return &Reader{ + reader: bufio.NewReader(reader), + Fields: StatusFields, + } +} + +// Read reads one record from `reader`. +// Read always returns either a non-nil record or a non-nil error, +// but not both. +// +// If there is no data left to be read, Read returns nil, io.EOF. +// +// If ReuseRecord is true, the returned slice may be shared +// between multiple calls to Read. +func (r *Reader) Read() (record []string, err error) { + if r.ReuseRecord { + record, err = r.readRecord(r.lastRecord) + r.lastRecord = record + } else { + record, err = r.readRecord(nil) + } + return record, err +} + +// FieldPos returns the entry and column corresponding to +// the start of the field with the given index in the slice most recently +// returned by Read. Numbering of entries and columns starts at 1; +// columns are counted in bytes, not runes. +// +// If this is called with an out-of-bounds index, it panics. +func (r *Reader) FieldPos(field int) (entry int, column int) { + if field < 0 || field >= len(r.fieldPositions) { + panic("out of range index passed to FieldPos") + } + p := &r.fieldPositions[field] + return p.entry, p.col +} + +// pos holds the position of a field in the current entry. +type position struct { + entry, col int +} + +// ReadAll reads all the records from reader until EOF. +// +// A successful call returns err == nil, not err == io.EOF. Because ReadAll is +// defined to read until EOF, it does not treat end of file as an error to be +// reported. +func (r *Reader) ReadAll() (records [][]string, err error) { + for { + record, err := r.readRecord(nil) + if err == io.EOF { + return records, nil + } + if err != nil { + return nil, err + } + records = append(records, record) + } +} + +// readEntry reads the next entry (with the trailing NUL). +// If EOF is hit without a trailing NUL, it will be omitted. +// If some bytes were read then the error is never io.EOF. +// The result is only valid until the next call to readEntry. +func (r *Reader) readEntry() ([]byte, error) { + entry, err := r.reader.ReadSlice('\000') + if err == bufio.ErrBufferFull { + r.rawBuffer = append(r.rawBuffer[:0], entry...) + for err == bufio.ErrBufferFull { + entry, err = r.reader.ReadSlice('\000') + r.rawBuffer = append(r.rawBuffer, entry...) + } + entry = r.rawBuffer + } + if len(entry) > 0 && err == io.EOF { + entry = append(entry, '\000') + err = nil + } + r.numEntry++ + + return entry, err +} + +// getFieldLength returns the field length and the separator length for advancing. +func getFieldLength(fieldType Field, fieldNumber int, fieldCount int, entry *[]byte) (int, int) { + switch fieldType { + case StatusX: + return 1, 0 + case StatusY: + return 1, 1 + default: + return bytes.IndexRune(*entry, getSeparator(fieldNumber, fieldCount)), 1 + } +} + +// getSeparator returns the separator between the current field and the next field. +// Since fields separators are regular it doesn't hard code them. +func getSeparator(fieldNumber int, fieldCount int) rune { + remaining := fieldCount - fieldNumber + + switch remaining { + default: + return _space + case 2: + return _tab + case 1: + return _nul + } +} + +// readRecord reads a single record. +func (r *Reader) readRecord(dst []string) ([]string, error) { + entry, errRead := r.readEntry() + if errRead == io.EOF { + return nil, errRead + } + + // Parse each field in the record. + r.recordBuffer = r.recordBuffer[:0] + r.fieldIndexes = r.fieldIndexes[:0] + r.fieldPositions = r.fieldPositions[:0] + pos := position{entry: r.numEntry, col: 1} + + fieldCount := len(r.Fields) + + for fieldNumber, fieldType := range r.Fields { + length, advance := getFieldLength(fieldType, fieldNumber, fieldCount, &entry) + field := entry[:length] + + fieldError := checkValid(fieldType, field) + if fieldError != nil { + return nil, &ParseError{ + Entry: pos.entry, + Column: pos.col, + Err: fieldError, + } + } + + offset := length + advance + entry = entry[offset:] + r.recordBuffer = append(r.recordBuffer, field...) + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, pos) + pos.col += offset + } + + // Create a single string and create slices out of it. + // This pins the memory of the fields together, but allocates once. + str := string(r.recordBuffer) // Convert to string once to batch allocations + dst = dst[:0] + if cap(dst) < len(r.fieldIndexes) { + dst = make([]string, len(r.fieldIndexes)) + } + dst = dst[:len(r.fieldIndexes)] + var preIdx int + for i, idx := range r.fieldIndexes { + dst[i] = str[preIdx:idx] + preIdx = idx + } + + return dst, nil +} diff --git a/cli/internal/encoding/gitoutput/gitoutput_test.go b/cli/internal/encoding/gitoutput/gitoutput_test.go new file mode 100644 index 0000000..19ab056 --- /dev/null +++ b/cli/internal/encoding/gitoutput/gitoutput_test.go @@ -0,0 +1,377 @@ +package gitoutput + +import ( + "fmt" + "io" + "reflect" + "strings" + "testing" + "unicode/utf8" +) + +type readTest struct { + Name string + Input string + Output [][]string + Reader func(io.Reader) *Reader + Positions [][][2]int + Errors []error + + // These fields are copied into the Reader + ReuseRecord bool +} + +// In these tests, the § and ∑ characters in readTest.Input are used to denote +// the start of a field and the position of an error respectively. +// They are removed before parsing and are used to verify the position +// information reported by FieldPos. + +var lsTreeTests = []readTest{ + { + Name: "simple", + Input: "§100644 §blob §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\t§package.json\000", + Output: [][]string{{"100644", "blob", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", "package.json"}}, + Reader: NewLSTreeReader, + }, + { + Name: "no trailing nul", + Input: "§100644 §blob §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\t§package.json", + Output: [][]string{{"100644", "blob", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", "package.json"}}, + Reader: NewLSTreeReader, + }, + { + Name: "weird file names", + Input: "§100644 §blob §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\t§\t\000§100644 §blob §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\t§\"\000§100644 §blob §5b999efa470b056e329b4c23a73904e0794bdc2f\t§\n\000§100644 §blob §f44f57fff95196c5f7139dfa0b96875f1e9650a9\t§.gitignore\000§100644 §blob §33dbaf21275ca2a5f460249d941cbc27d5da3121\t§README.md\000§040000 §tree §7360f2d292aec95907cebdcbb412a6bf2bd10f8a\t§apps\000§100644 §blob §9ec2879b24ce2c817296eebe2cb3846f8e4751ea\t§package.json\000§040000 §tree §5759aadaea2cde55468a61e7104eb0a9d86c1d30\t§packages\000§100644 §blob §33d0621ee2f4da4a2f6f6bdd51a42618d181e337\t§turbo.json\000", + Output: [][]string{ + {"100644", "blob", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", "\t"}, + {"100644", "blob", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", "\""}, + {"100644", "blob", "5b999efa470b056e329b4c23a73904e0794bdc2f", "\n"}, + {"100644", "blob", "f44f57fff95196c5f7139dfa0b96875f1e9650a9", ".gitignore"}, + {"100644", "blob", "33dbaf21275ca2a5f460249d941cbc27d5da3121", "README.md"}, + {"040000", "tree", "7360f2d292aec95907cebdcbb412a6bf2bd10f8a", "apps"}, + {"100644", "blob", "9ec2879b24ce2c817296eebe2cb3846f8e4751ea", "package.json"}, + {"040000", "tree", "5759aadaea2cde55468a61e7104eb0a9d86c1d30", "packages"}, + {"100644", "blob", "33d0621ee2f4da4a2f6f6bdd51a42618d181e337", "turbo.json"}, + }, + Reader: NewLSTreeReader, + }, + { + Name: "invalid object mode", + Input: "∑888888 §blob §5b999efa470b056e329b4c23a73904e0794bdc2f\t§.eslintrc.js\000", + Output: [][]string{}, + Reader: NewLSTreeReader, + Errors: []error{&ParseError{Err: ErrInvalidObjectMode}}, + }, + { + Name: "invalid object type", + Input: "§100644 ∑bush §5b999efa470b056e329b4c23a73904e0794bdc2f\t§.eslintrc.js\000", + Output: [][]string{}, + Reader: NewLSTreeReader, + Errors: []error{&ParseError{Err: ErrInvalidObjectType}}, + }, + { + Name: "invalid object name", + Input: "§100644 §blob ∑Zb999efa470b056e329b4c23a73904e0794bdc2f\t§.eslintrc.js\000", + Output: [][]string{}, + Reader: NewLSTreeReader, + Errors: []error{&ParseError{Err: ErrInvalidObjectName}}, + }, + { + Name: "invalid path", + Input: "§100644 §blob §5b999efa470b056e329b4c23a73904e0794bdc2f\t∑\000", + Output: [][]string{}, + Reader: NewLSTreeReader, + Errors: []error{&ParseError{Err: ErrInvalidPath}}, + }, +} + +var lsFilesTests = []readTest{ + { + Name: "simple", + Input: "§100644 §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 §0\t§package.json\000", + Output: [][]string{{"100644", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", "0", "package.json"}}, + Reader: NewLSFilesReader, + }, + { + Name: "no trailing nul", + Input: "§100644 §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 §0\t§package.json", + Output: [][]string{{"100644", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", "0", "package.json"}}, + Reader: NewLSFilesReader, + }, + { + Name: "invalid object mode", + Input: "∑888888 §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 §0\t§package.json", + Output: [][]string{}, + Reader: NewLSFilesReader, + Errors: []error{&ParseError{Err: ErrInvalidObjectMode}}, + }, + { + Name: "invalid object name", + Input: "§100644 ∑Z69de29bb2d1d6434b8b29ae775ad8c2e48c5391 §0\t§package.json", + Output: [][]string{}, + Reader: NewLSFilesReader, + Errors: []error{&ParseError{Err: ErrInvalidObjectName}}, + }, + { + Name: "invalid object stage", + Input: "§100644 §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 ∑4\t§package.json", + Output: [][]string{}, + Reader: NewLSFilesReader, + Errors: []error{&ParseError{Err: ErrInvalidObjectStage}}, + }, + { + Name: "invalid path", + Input: "§100644 §e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 §0\t∑", + Output: [][]string{}, + Reader: NewLSFilesReader, + Errors: []error{&ParseError{Err: ErrInvalidPath}}, + }, +} + +var statusTests = []readTest{ + { + Name: "simple", + Input: "§A§D §package.json\000", + Output: [][]string{{"A", "D", "package.json"}}, + Reader: NewStatusReader, + }, + { + Name: "no trailing nul", + Input: "§A§D §package.json", + Output: [][]string{{"A", "D", "package.json"}}, + Reader: NewStatusReader, + }, + { + Name: "invalid status X", + Input: "∑~§D §package.json\000", + Output: [][]string{}, + Reader: NewStatusReader, + Errors: []error{&ParseError{Err: ErrInvalidObjectStatusX}}, + }, + { + Name: "invalid status Y", + Input: "§D∑~ §package.json\000", + Output: [][]string{}, + Reader: NewStatusReader, + Errors: []error{&ParseError{Err: ErrInvalidObjectStatusY}}, + }, + { + Name: "invalid path", + Input: "§A§D ∑\000", + Output: [][]string{}, + Reader: NewStatusReader, + Errors: []error{&ParseError{Err: ErrInvalidPath}}, + }, +} + +func TestRead(t *testing.T) { + newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int) { + positions, errPositions, input := makePositions(tt.Input) + r := tt.Reader(strings.NewReader(input)) + + r.ReuseRecord = tt.ReuseRecord + return r, positions, errPositions + } + + allTests := []readTest{} + allTests = append(allTests, lsTreeTests...) + allTests = append(allTests, lsFilesTests...) + allTests = append(allTests, statusTests...) + + for _, tt := range allTests { + t.Run(tt.Name, func(t *testing.T) { + r, positions, errPositions := newReader(tt) + out, err := r.ReadAll() + if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil { + if !reflect.DeepEqual(err, wantErr) { + t.Fatalf("ReadAll() error mismatch:\ngot %v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr) + } + if out != nil { + t.Fatalf("ReadAll() output:\ngot %q\nwant nil", out) + } + } else { + if err != nil { + t.Fatalf("unexpected Readall() error: %v", err) + } + if !reflect.DeepEqual(out, tt.Output) { + t.Fatalf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) + } + } + + // Check field and error positions. + r, _, _ = newReader(tt) + for recNum := 0; ; recNum++ { + rec, err := r.Read() + var wantErr error + if recNum < len(tt.Errors) && tt.Errors[recNum] != nil { + wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions) + } else if recNum >= len(tt.Output) { + wantErr = io.EOF + } + if !reflect.DeepEqual(err, wantErr) { + t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr) + } + if err != nil { + if recNum < len(tt.Output) { + t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output)) + } + break + } + if got, want := rec, tt.Output[recNum]; !reflect.DeepEqual(got, want) { + t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want) + } + pos := positions[recNum] + if len(pos) != len(rec) { + t.Fatalf("mismatched position length at record %d", recNum) + } + for i := range rec { + entry, col := r.FieldPos(i) + if got, want := [2]int{entry, col}, pos[i]; got != want { + t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want) + } + } + } + }) + } +} + +// firstError returns the first non-nil error in errs, +// with the position adjusted according to the error's +// index inside positions. +func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error { + for i, err := range errs { + if err != nil { + return errorWithPosition(err, i, positions, errPositions) + } + } + return nil +} + +func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error { + parseErr, ok := err.(*ParseError) + if !ok { + return err + } + if recNum >= len(positions) { + panic(fmt.Errorf("no positions found for error at record %d", recNum)) + } + errPos, ok := errPositions[recNum] + if !ok { + panic(fmt.Errorf("no error position found for error at record %d", recNum)) + } + parseErr1 := *parseErr + parseErr1.Entry = errPos[0] + parseErr1.Column = errPos[1] + return &parseErr1 +} + +// makePositions returns the expected field positions of all the fields in text, +// the positions of any errors, and the text with the position markers removed. +// +// The start of each field is marked with a § symbol; +// Error positions are marked with ∑ symbols. +func makePositions(text string) ([][][2]int, map[int][2]int, string) { + buf := make([]byte, 0, len(text)) + var positions [][][2]int + errPositions := make(map[int][2]int) + entry, col := 1, 1 + recNum := 0 + + for len(text) > 0 { + r, size := utf8.DecodeRuneInString(text) + switch r { + case '\000': + col = 1 + buf = append(buf, '\000') + positions = append(positions, [][2]int{}) + entry++ + recNum++ + case '§': + if len(positions) == 0 { + positions = append(positions, [][2]int{}) + } + positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{entry, col}) + case '∑': + errPositions[recNum] = [2]int{entry, col} + default: + buf = append(buf, text[:size]...) + col += size + } + text = text[size:] + } + return positions, errPositions, string(buf) +} + +// nTimes is an io.Reader which yields the string s n times. +type nTimes struct { + s string + n int + off int +} + +func (r *nTimes) Read(p []byte) (n int, err error) { + for { + if r.n <= 0 || r.s == "" { + return n, io.EOF + } + n0 := copy(p, r.s[r.off:]) + p = p[n0:] + n += n0 + r.off += n0 + if r.off == len(r.s) { + r.off = 0 + r.n-- + } + if len(p) == 0 { + return + } + } +} + +// TODO: track other types. +// benchmarkRead measures reading the provided ls-tree data. +// initReader, if non-nil, modifies the Reader before it's used. +func benchmarkRead(b *testing.B, getReader func(reader io.Reader) *Reader, initReader func(*Reader), rows string) { + b.ReportAllocs() + r := getReader(&nTimes{s: rows, n: b.N}) + if initReader != nil { + initReader(r) + } + for { + _, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + b.Fatal(err) + } + } +} + +const benchmarkLSTreeData = `100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 \000100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 "\000100644 blob 5b999efa470b056e329b4c23a73904e0794bdc2f .eslintrc.js\000100644 blob f44f57fff95196c5f7139dfa0b96875f1e9650a9 .gitignore\000100644 blob 33dbaf21275ca2a5f460249d941cbc27d5da3121 README.md\000040000 tree 7360f2d292aec95907cebdcbb412a6bf2bd10f8a apps\000100644 blob 9ec2879b24ce2c817296eebe2cb3846f8e4751ea package.json\000040000 tree 5759aadaea2cde55468a61e7104eb0a9d86c1d30 packages\000100644 blob 33d0621ee2f4da4a2f6f6bdd51a42618d181e337 turbo.json\000` +const benchmarkLSFilesData = `100644 13e399637190f1edb7f034b4281ecfafb5dab9e2 0 Makefile\000100644 6c1c500409989499db51f1eff37b38b857547fdc 0 cmd/turbo/main.go\000100644 2d2b9a2c3ba82f6b806f58c7f7d5eb55fefa837e 0 cmd/turbo/main_utils.go\000100644 3329c8a7f6edee487caeeaf56c600f7c85fc69e7 0 cmd/turbo/signals.go\000100644 e81df7b6ed9a277c30dd35e3524d00e8b13cf584 0 cmd/turbo/version.go\000100644 8992ebf37df05fc5ff64c0f811a3259adff10d70 0 go.mod\000100644 3da872301c79986673d6a12914fbd48c924f5999 0 go.sum\000100644 d7b2d20a037aa9bf8b48eef451eb5f9ba5904237 0 internal/analytics/analytics.go\000` +const benchmarkStatusData = ` M cli/internal/encoding/gitoutput/gitoutput.go\000 M cli/internal/encoding/gitoutput/gitoutput_test.go\000?? NOTICES.md\000 M cli/internal/encoding/gitoutput/gitoutput.go\000 M cli/internal/encoding/gitoutput/gitoutput_test.go\000?? NOTICES.md\000 M cli/internal/encoding/gitoutput/gitoutput.go\000 M cli/internal/encoding/gitoutput/gitoutput_test.go\000?? NOTICES.md\000 M cli/internal/encoding/gitoutput/gitoutput.go\000 M cli/internal/encoding/gitoutput/gitoutput_test.go\000?? NOTICES.md\000 M cli/internal/encoding/gitoutput/gitoutput.go\000 M cli/internal/encoding/gitoutput/gitoutput_test.go\000` + +func BenchmarkLSTreeRead(b *testing.B) { + benchmarkRead(b, NewLSTreeReader, nil, benchmarkLSTreeData) +} + +func BenchmarkLSTreeReadReuseRecord(b *testing.B) { + benchmarkRead(b, NewLSTreeReader, func(r *Reader) { r.ReuseRecord = true }, benchmarkLSTreeData) +} + +func BenchmarkLSFilesRead(b *testing.B) { + benchmarkRead(b, NewLSFilesReader, nil, benchmarkLSFilesData) +} + +func BenchmarkLSFilesReadReuseRecord(b *testing.B) { + benchmarkRead(b, NewLSFilesReader, func(r *Reader) { r.ReuseRecord = true }, benchmarkLSFilesData) +} + +func BenchmarkStatusRead(b *testing.B) { + benchmarkRead(b, NewStatusReader, nil, benchmarkStatusData) +} + +func BenchmarkStatusReadReuseRecord(b *testing.B) { + benchmarkRead(b, NewStatusReader, func(r *Reader) { r.ReuseRecord = true }, benchmarkStatusData) +} diff --git a/cli/internal/encoding/gitoutput/validators.go b/cli/internal/encoding/gitoutput/validators.go new file mode 100644 index 0000000..e13c2d5 --- /dev/null +++ b/cli/internal/encoding/gitoutput/validators.go @@ -0,0 +1,148 @@ +package gitoutput + +import "bytes" + +var _allowedObjectType = []byte(" blob tree commit ") +var _allowedStatusChars = []byte(" MTADRCU?!") + +// checkValid provides a uniform interface for calling `gitoutput` validators. +func checkValid(fieldType Field, value []byte) error { + switch fieldType { + case ObjectMode: + return checkObjectMode(value) + case ObjectType: + return checkObjectType(value) + case ObjectName: + return CheckObjectName(value) + case ObjectStage: + return checkObjectStage(value) + case StatusX: + return checkStatusX(value) + case StatusY: + return checkStatusY(value) + case Path: + return checkPath(value) + default: + return ErrUnknownField + } +} + +// checkObjectMode asserts that a byte slice is a six digit octal string (100644). +// It does not attempt to ensure that the values in particular positions are reasonable. +func checkObjectMode(value []byte) error { + if len(value) != 6 { + return ErrInvalidObjectMode + } + + // 0-7 are 0x30 - 0x37 + for _, currentByte := range value { + if (currentByte ^ 0x30) > 7 { + return ErrInvalidObjectMode + } + } + + // length of 6, 0-7 + return nil +} + +// checkObjectType asserts that a byte slice is a valid possibility (blob, tree, commit). +func checkObjectType(value []byte) error { + typeLength := len(value) + // Based upon: + // min(len("blob"), len("tree"), len("commit")) + // max(len("blob"), len("tree"), len("commit")) + if typeLength < 4 || typeLength > 6 { + return ErrInvalidObjectType + } + + // Because of the space separator there is no way to pass in a space. + // We use that trick to enable fast lookups in _allowedObjectType. + index := bytes.Index(_allowedObjectType, value) + + // Impossible to match at 0, not found is -1. + if index < 1 { + return ErrInvalidObjectType + } + + // Followed by a space. + if _allowedObjectType[index-1] != byte(_space) { + return ErrInvalidObjectType + } + + // Preceded by a space. + if _allowedObjectType[index+typeLength] != byte(_space) { + return ErrInvalidObjectType + } + return nil +} + +// CheckObjectName asserts that a byte slice looks like a SHA hash. +func CheckObjectName(value []byte) error { + if len(value) != 40 { + return ErrInvalidObjectName + } + + // 0-9 are 0x30 - 0x39 + // a-f are 0x61 - 0x66 + for _, currentByte := range value { + isNumber := (currentByte ^ 0x30) < 10 + numericAlpha := (currentByte ^ 0x60) + isAlpha := (numericAlpha < 7) && (numericAlpha > 0) + if !(isNumber || isAlpha) { + return ErrInvalidObjectName + } + } + + // length of 40, hex + return nil +} + +// checkObjectStage asserts that a byte slice is a valid possibility (0-3). +func checkObjectStage(value []byte) error { + // 0-3 are 0x30 - 0x33 + if len(value) != 1 { + return ErrInvalidObjectStage + } + + currentByte := value[0] + if (currentByte ^ 0x30) >= 4 { + return ErrInvalidObjectStage + } + + return nil +} + +// checkStatusX asserts that a byte slice is a valid possibility (" MTADRCU?!"). +func checkStatusX(value []byte) error { + if len(value) != 1 { + return ErrInvalidObjectStatusX + } + + index := bytes.Index(_allowedStatusChars, value) + if index == -1 { + return ErrInvalidObjectStatusX + } + return nil +} + +// checkStatusY asserts that a byte slice is a valid possibility (" MTADRCU?!"). +func checkStatusY(value []byte) error { + if len(value) != 1 { + return ErrInvalidObjectStatusY + } + + index := bytes.Index(_allowedStatusChars, value) + if index == -1 { + return ErrInvalidObjectStatusY + } + return nil +} + +// checkPath asserts that a byte slice is non-empty. +func checkPath(value []byte) error { + // Exists at all. This is best effort as trying to be fully-compatible is silly. + if len(value) == 0 { + return ErrInvalidPath + } + return nil +} diff --git a/cli/internal/encoding/gitoutput/validators_test.go b/cli/internal/encoding/gitoutput/validators_test.go new file mode 100644 index 0000000..29e1274 --- /dev/null +++ b/cli/internal/encoding/gitoutput/validators_test.go @@ -0,0 +1,514 @@ +package gitoutput + +import ( + "testing" +) + +func Test_checkValid(t *testing.T) { + type args struct { + fieldType Field + value []byte + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "ObjectMode", + args: args{ + fieldType: ObjectMode, + value: []byte("100644"), + }, + wantErr: false, + }, + { + name: "ObjectType", + args: args{ + fieldType: ObjectType, + value: []byte("blob"), + }, + wantErr: false, + }, + { + name: "ObjectName", + args: args{ + fieldType: ObjectName, + value: []byte("8992ebf37df05fc5ff64c0f811a3259adff10d70"), + }, + wantErr: false, + }, + { + name: "ObjectStage", + args: args{ + fieldType: ObjectStage, + value: []byte("0"), + }, + wantErr: false, + }, + { + name: "StatusX", + args: args{ + fieldType: StatusX, + value: []byte("!"), + }, + wantErr: false, + }, + { + name: "StatusY", + args: args{ + fieldType: StatusY, + value: []byte("?"), + }, + wantErr: false, + }, + { + name: "Path", + args: args{ + fieldType: Path, + value: []byte("/hello/world"), + }, + wantErr: false, + }, + { + name: "Unknown", + args: args{ + fieldType: Field(12), + value: []byte("unused"), + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := checkValid(tt.args.fieldType, tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("checkValid() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Test_checkObjectMode(t *testing.T) { + type args struct { + value []byte + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "Simple", + args: args{ + value: []byte("100644"), + }, + wantErr: false, + }, + { + name: "All sevens", + args: args{ + value: []byte("777777"), + }, + wantErr: false, + }, + { + name: "All zeroes", + args: args{ + value: []byte("000000"), + }, + wantErr: false, + }, + { + name: "Non-octal chars", + args: args{ + value: []byte("sixsix"), + }, + wantErr: true, + }, + { + name: "nul", + args: args{ + value: []byte("\000\000\000\000\000\000"), + }, + wantErr: true, + }, + { + name: "too long", + args: args{ + value: []byte("1234567"), + }, + wantErr: true, + }, + { + name: "off by plus one", + args: args{ + value: []byte("888888"), + }, + wantErr: true, + }, + { + name: "off by minus one", + args: args{ + value: []byte("//////"), + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := checkObjectMode(tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("checkObjectMode() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Test_checkObjectType(t *testing.T) { + type args struct { + value []byte + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "Finds blob", + args: args{ + value: []byte("blob"), + }, + wantErr: false, + }, + { + name: "Finds tree", + args: args{ + value: []byte("tree"), + }, + wantErr: false, + }, + { + name: "Finds commit", + args: args{ + value: []byte("commit"), + }, + wantErr: false, + }, + { + name: "nonsense input", + args: args{ + value: []byte("input"), + }, + wantErr: true, + }, + { + name: "Knows too much about the implementation details (all 3)", + args: args{ + value: []byte("blob tree commit"), + }, + wantErr: true, + }, + { + name: "Knows too much about the implementation details (first two)", + args: args{ + value: []byte("blob tree"), + }, + wantErr: true, + }, + { + name: "Knows too much about the implementation details (last two)", + args: args{ + value: []byte("tree commit"), + }, + wantErr: true, + }, + { + name: "Knows too much about the implementation details (arbitrary substring)", + args: args{ + value: []byte("tree c"), + }, + wantErr: true, + }, + { + name: "Knows too much about the implementation details (space)", + args: args{ + value: []byte(" "), + }, + wantErr: true, + }, + { + name: "Knows too much about the implementation details (empty string)", + args: args{ + value: []byte(""), + }, + wantErr: true, + }, + { + name: "Knows too much about the implementation details (leading space)", + args: args{ + value: []byte(" tree"), + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := checkObjectType(tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("checkObjectType() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func TestCheckObjectName(t *testing.T) { + type args struct { + value []byte + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "Simple", + args: args{ + value: []byte("8992ebf37df05fc5ff64c0f811a3259adff10d70"), + }, + wantErr: false, + }, + { + name: "Too short", + args: args{ + value: []byte("8992ebf37df05fc5ff64"), + }, + wantErr: true, + }, + { + name: "Too long", + args: args{ + value: []byte("8992ebf37df05fc5ff64c0f811a3259adff10d708992ebf37df05fc5ff64c0f811a3259adff10d70"), + }, + wantErr: true, + }, + { + name: "Not hex", + args: args{ + value: []byte("z992ebf37df05fc5ff64c0f811a3259adff10d70"), + }, + wantErr: true, + }, + { + name: "Not lowercase", + args: args{ + value: []byte("8992EBF37DF05FC5FF64C0F811A3259ADFF10D70"), + }, + wantErr: true, + }, + { + name: "Off by plus one in the ASCII table (a-f).", + args: args{ + value: []byte("gggggggggggggggggggggggggggggggggggggggg"), + }, + wantErr: true, + }, + { + name: "Off by minus one in the ASCII table (a-f).", + args: args{ + value: []byte("````````````````````````````````````````"), + }, + wantErr: true, + }, + { + name: "Off by minus one in the ASCII table (0-9).", + args: args{ + value: []byte("////////////////////////////////////////"), + }, + wantErr: true, + }, + { + name: "Off by plus one in the ASCII table (0-9).", + args: args{ + value: []byte("::::::::::::::::::::::::::::::::::::::::"), + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := CheckObjectName(tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("CheckObjectName() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Test_checkObjectStage(t *testing.T) { + type args struct { + value []byte + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "0", + args: args{ + value: []byte("0"), + }, + wantErr: false, + }, + { + name: "1", + args: args{ + value: []byte("1"), + }, + wantErr: false, + }, + { + name: "2", + args: args{ + value: []byte("2"), + }, + wantErr: false, + }, + { + name: "3", + args: args{ + value: []byte("3"), + }, + wantErr: false, + }, + { + name: "/", + args: args{ + value: []byte("/"), + }, + wantErr: true, + }, + { + name: "4", + args: args{ + value: []byte("4"), + }, + wantErr: true, + }, + { + name: "00", + args: args{ + value: []byte("00"), + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := checkObjectStage(tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("checkObjectStage() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Test_checkStatus(t *testing.T) { + type args struct { + value []byte + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "Simple", + args: args{ + value: []byte("D"), + }, + wantErr: false, + }, + { + name: "Space", + args: args{ + value: []byte(" "), + }, + wantErr: false, + }, + { + name: "Empty", + args: args{ + value: []byte(""), + }, + wantErr: true, + }, + { + name: "Too long", + args: args{ + value: []byte("?!"), + }, + wantErr: true, + }, + { + name: "nul", + args: args{ + value: []byte("\000"), + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := checkStatusX(tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("checkStatusX() error = %v, wantErr %v", err, tt.wantErr) + } + if err := checkStatusY(tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("checkStatusY() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Test_checkPath(t *testing.T) { + type args struct { + value []byte + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "Simple", + args: args{ + value: []byte("./"), + }, + wantErr: false, + }, + { + name: "newline", + args: args{ + value: []byte("has\nnewline"), + }, + wantErr: false, + }, + { + name: "Empty", + args: args{ + value: []byte(""), + }, + wantErr: true, + }, + { + name: "newline", + args: args{ + value: []byte("\n"), + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := checkPath(tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("checkPath() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} |
