diff options
Diffstat (limited to 'cli/internal/encoding/gitoutput/gitoutput.go')
| -rw-r--r-- | cli/internal/encoding/gitoutput/gitoutput.go | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/cli/internal/encoding/gitoutput/gitoutput.go b/cli/internal/encoding/gitoutput/gitoutput.go new file mode 100644 index 0000000..1c2ad4f --- /dev/null +++ b/cli/internal/encoding/gitoutput/gitoutput.go @@ -0,0 +1,345 @@ +// Package gitoutput reads the output of calls to `git`. +package gitoutput + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" +) + +// These describe the structure of fields in the output of `git` commands. +var ( + LsTreeFields = []Field{ObjectMode, ObjectType, ObjectName, Path} + LsFilesFields = []Field{ObjectMode, ObjectName, ObjectStage, Path} + StatusFields = []Field{StatusX, StatusY, Path} +) + +var _lsTreeFieldToIndex = map[Field]int{ + ObjectMode: 0, + ObjectType: 1, + ObjectName: 2, + Path: 3, +} + +var _lsFilesFieldToIndex = map[Field]int{ + ObjectMode: 0, + ObjectName: 1, + ObjectStage: 2, + Path: 3, +} + +var _statusFieldToIndex = map[Field]int{ + StatusX: 0, + StatusY: 1, + Path: 2, +} + +// Field is the type for fields available in outputs to `git`. +// Used for naming and sensible call sites. +type Field int + +const ( + // ObjectMode is the mode field from `git` outputs. e.g. 100644 + ObjectMode Field = iota + 1 + // ObjectType is the set of allowed types from `git` outputs: blob, tree, commit + ObjectType + // ObjectName is the 40-character SHA hash + ObjectName + // ObjectStage is a value 0-3. + ObjectStage + // StatusX is the first character of the two-character output from `git status`. + StatusX + // StatusY is the second character of the two-character output from `git status`. + StatusY + // Path is the file path under version control in `git`. + Path +) + +// LsTreeEntry is the result from call `git ls-files` +type LsTreeEntry []string + +// LsFilesEntry is the result from call `git ls-tree` +type LsFilesEntry []string + +// StatusEntry is the result from call `git status` +type StatusEntry []string + +// GetField returns the value of the specified field. +func (e LsTreeEntry) GetField(field Field) string { + value, exists := _lsTreeFieldToIndex[field] + if !exists { + panic("Received an invalid field for LsTreeEntry.") + } + return e[value] +} + +// GetField returns the value of the specified field. +func (e LsFilesEntry) GetField(field Field) string { + value, exists := _lsFilesFieldToIndex[field] + if !exists { + panic("Received an invalid field for LsFilesEntry.") + } + return e[value] +} + +// GetField returns the value of the specified field. +func (e StatusEntry) GetField(field Field) string { + value, exists := _statusFieldToIndex[field] + if !exists { + panic("Received an invalid field for StatusEntry.") + } + return e[value] +} + +// Separators that appear in the output of `git` commands. +const ( + _space = ' ' + _tab = '\t' + _nul = '\000' +) + +// A ParseError is returned for parsing errors. +// Entries and columns are both 1-indexed. +type ParseError struct { + Entry int // Entry where the error occurred + Column int // Column where the error occurred + Err error // The actual error +} + +// Error creates a string for a parse error. +func (e *ParseError) Error() string { + return fmt.Sprintf("parse error on entry %d, column %d: %v", e.Entry, e.Column, e.Err) +} + +// Unwrap returns the raw error. +func (e *ParseError) Unwrap() error { return e.Err } + +// These are the errors that can be returned in ParseError.Err. +var ( + ErrInvalidObjectMode = errors.New("object mode is not valid") + ErrInvalidObjectType = errors.New("object type is not valid") + ErrInvalidObjectName = errors.New("object name is not valid") + ErrInvalidObjectStage = errors.New("object stage is not valid") + ErrInvalidObjectStatusX = errors.New("object status x is not valid") + ErrInvalidObjectStatusY = errors.New("object status y is not valid") + ErrInvalidPath = errors.New("path is not valid") + ErrUnknownField = errors.New("unknown field") +) + +// A Reader reads records from `git`'s output`. +type Reader struct { + // ReuseRecord controls whether calls to Read may return a slice sharing + // the backing array of the previous call's returned slice for performance. + // By default, each call to Read returns newly allocated memory owned by the caller. + ReuseRecord bool + + // Fields specifies the type of each field. + Fields []Field + + reader *bufio.Reader + + // numEntry is the current entry being read in the `git` output. + numEntry int + + // rawBuffer is an entry buffer only used by the readEntry method. + rawBuffer []byte + + // recordBuffer holds the unescaped fields, one after another. + // The fields can be accessed by using the indexes in fieldIndexes. + recordBuffer []byte + + // fieldIndexes is an index of fields inside recordBuffer. + // The i'th field ends at offset fieldIndexes[i] in recordBuffer. + fieldIndexes []int + + // fieldPositions is an index of field positions for the + // last record returned by Read. + fieldPositions []position + + // lastRecord is a record cache and only used when ReuseRecord == true. + lastRecord []string +} + +// NewLSTreeReader returns a new Reader that reads from reader. +func NewLSTreeReader(reader io.Reader) *Reader { + return &Reader{ + reader: bufio.NewReader(reader), + Fields: LsTreeFields, + } +} + +// NewLSFilesReader returns a new Reader that reads from reader. +func NewLSFilesReader(reader io.Reader) *Reader { + return &Reader{ + reader: bufio.NewReader(reader), + Fields: LsFilesFields, + } +} + +// NewStatusReader returns a new Reader that reads from reader. +func NewStatusReader(reader io.Reader) *Reader { + return &Reader{ + reader: bufio.NewReader(reader), + Fields: StatusFields, + } +} + +// Read reads one record from `reader`. +// Read always returns either a non-nil record or a non-nil error, +// but not both. +// +// If there is no data left to be read, Read returns nil, io.EOF. +// +// If ReuseRecord is true, the returned slice may be shared +// between multiple calls to Read. +func (r *Reader) Read() (record []string, err error) { + if r.ReuseRecord { + record, err = r.readRecord(r.lastRecord) + r.lastRecord = record + } else { + record, err = r.readRecord(nil) + } + return record, err +} + +// FieldPos returns the entry and column corresponding to +// the start of the field with the given index in the slice most recently +// returned by Read. Numbering of entries and columns starts at 1; +// columns are counted in bytes, not runes. +// +// If this is called with an out-of-bounds index, it panics. +func (r *Reader) FieldPos(field int) (entry int, column int) { + if field < 0 || field >= len(r.fieldPositions) { + panic("out of range index passed to FieldPos") + } + p := &r.fieldPositions[field] + return p.entry, p.col +} + +// pos holds the position of a field in the current entry. +type position struct { + entry, col int +} + +// ReadAll reads all the records from reader until EOF. +// +// A successful call returns err == nil, not err == io.EOF. Because ReadAll is +// defined to read until EOF, it does not treat end of file as an error to be +// reported. +func (r *Reader) ReadAll() (records [][]string, err error) { + for { + record, err := r.readRecord(nil) + if err == io.EOF { + return records, nil + } + if err != nil { + return nil, err + } + records = append(records, record) + } +} + +// readEntry reads the next entry (with the trailing NUL). +// If EOF is hit without a trailing NUL, it will be omitted. +// If some bytes were read then the error is never io.EOF. +// The result is only valid until the next call to readEntry. +func (r *Reader) readEntry() ([]byte, error) { + entry, err := r.reader.ReadSlice('\000') + if err == bufio.ErrBufferFull { + r.rawBuffer = append(r.rawBuffer[:0], entry...) + for err == bufio.ErrBufferFull { + entry, err = r.reader.ReadSlice('\000') + r.rawBuffer = append(r.rawBuffer, entry...) + } + entry = r.rawBuffer + } + if len(entry) > 0 && err == io.EOF { + entry = append(entry, '\000') + err = nil + } + r.numEntry++ + + return entry, err +} + +// getFieldLength returns the field length and the separator length for advancing. +func getFieldLength(fieldType Field, fieldNumber int, fieldCount int, entry *[]byte) (int, int) { + switch fieldType { + case StatusX: + return 1, 0 + case StatusY: + return 1, 1 + default: + return bytes.IndexRune(*entry, getSeparator(fieldNumber, fieldCount)), 1 + } +} + +// getSeparator returns the separator between the current field and the next field. +// Since fields separators are regular it doesn't hard code them. +func getSeparator(fieldNumber int, fieldCount int) rune { + remaining := fieldCount - fieldNumber + + switch remaining { + default: + return _space + case 2: + return _tab + case 1: + return _nul + } +} + +// readRecord reads a single record. +func (r *Reader) readRecord(dst []string) ([]string, error) { + entry, errRead := r.readEntry() + if errRead == io.EOF { + return nil, errRead + } + + // Parse each field in the record. + r.recordBuffer = r.recordBuffer[:0] + r.fieldIndexes = r.fieldIndexes[:0] + r.fieldPositions = r.fieldPositions[:0] + pos := position{entry: r.numEntry, col: 1} + + fieldCount := len(r.Fields) + + for fieldNumber, fieldType := range r.Fields { + length, advance := getFieldLength(fieldType, fieldNumber, fieldCount, &entry) + field := entry[:length] + + fieldError := checkValid(fieldType, field) + if fieldError != nil { + return nil, &ParseError{ + Entry: pos.entry, + Column: pos.col, + Err: fieldError, + } + } + + offset := length + advance + entry = entry[offset:] + r.recordBuffer = append(r.recordBuffer, field...) + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, pos) + pos.col += offset + } + + // Create a single string and create slices out of it. + // This pins the memory of the fields together, but allocates once. + str := string(r.recordBuffer) // Convert to string once to batch allocations + dst = dst[:0] + if cap(dst) < len(r.fieldIndexes) { + dst = make([]string, len(r.fieldIndexes)) + } + dst = dst[:len(r.fieldIndexes)] + var preIdx int + for i, idx := range r.fieldIndexes { + dst[i] = str[preIdx:idx] + preIdx = idx + } + + return dst, nil +} |
