diff options
Diffstat (limited to 'cli/internal/cacheitem')
| -rw-r--r-- | cli/internal/cacheitem/cacheitem.go | 76 | ||||
| -rw-r--r-- | cli/internal/cacheitem/create.go | 119 | ||||
| -rw-r--r-- | cli/internal/cacheitem/create_test.go | 205 | ||||
| -rw-r--r-- | cli/internal/cacheitem/create_unix_test.go | 20 | ||||
| -rw-r--r-- | cli/internal/cacheitem/create_windows_test.go | 14 | ||||
| -rw-r--r-- | cli/internal/cacheitem/filepath.go | 162 | ||||
| -rw-r--r-- | cli/internal/cacheitem/filepath_unix.go | 14 | ||||
| -rw-r--r-- | cli/internal/cacheitem/filepath_windows.go | 50 | ||||
| -rw-r--r-- | cli/internal/cacheitem/restore.go | 200 | ||||
| -rw-r--r-- | cli/internal/cacheitem/restore_directory.go | 144 | ||||
| -rw-r--r-- | cli/internal/cacheitem/restore_directory_test.go | 103 | ||||
| -rw-r--r-- | cli/internal/cacheitem/restore_regular.go | 46 | ||||
| -rw-r--r-- | cli/internal/cacheitem/restore_symlink.go | 180 | ||||
| -rw-r--r-- | cli/internal/cacheitem/restore_test.go | 1493 |
14 files changed, 2826 insertions, 0 deletions
diff --git a/cli/internal/cacheitem/cacheitem.go b/cli/internal/cacheitem/cacheitem.go new file mode 100644 index 0000000..2fb2c3b --- /dev/null +++ b/cli/internal/cacheitem/cacheitem.go @@ -0,0 +1,76 @@ +// Package cacheitem is an abstraction over the creation and restoration of a cache +package cacheitem + +import ( + "archive/tar" + "bufio" + "crypto/sha512" + "errors" + "io" + "os" + + "github.com/vercel/turbo/cli/internal/turbopath" +) + +var ( + errMissingSymlinkTarget = errors.New("symlink restoration is delayed") + errCycleDetected = errors.New("links in the cache are cyclic") + errTraversal = errors.New("tar attempts to write outside of directory") + errNameMalformed = errors.New("file name is malformed") + errNameWindowsUnsafe = errors.New("file name is not Windows-safe") + errUnsupportedFileType = errors.New("attempted to restore unsupported file type") +) + +// CacheItem is a `tar` utility with a little bit extra. +type CacheItem struct { + // Path is the location on disk for the CacheItem. + Path turbopath.AbsoluteSystemPath + // Anchor is the position on disk at which the CacheItem will be restored. + Anchor turbopath.AbsoluteSystemPath + + // For creation. + tw *tar.Writer + zw io.WriteCloser + fileBuffer *bufio.Writer + handle *os.File + compressed bool +} + +// Close any open pipes +func (ci *CacheItem) Close() error { + if ci.tw != nil { + if err := ci.tw.Close(); err != nil { + return err + } + } + + if ci.zw != nil { + if err := ci.zw.Close(); err != nil { + return err + } + } + + if ci.fileBuffer != nil { + if err := ci.fileBuffer.Flush(); err != nil { + return err + } + } + + if ci.handle != nil { + if err := ci.handle.Close(); err != nil { + return err + } + } + + return nil +} + +// GetSha returns the SHA-512 hash for the CacheItem. +func (ci *CacheItem) GetSha() ([]byte, error) { + sha := sha512.New() + if _, err := io.Copy(sha, ci.handle); err != nil { + return nil, err + } + + return sha.Sum(nil), nil +} diff --git a/cli/internal/cacheitem/create.go b/cli/internal/cacheitem/create.go new file mode 100644 index 0000000..ce5b1c8 --- /dev/null +++ b/cli/internal/cacheitem/create.go @@ -0,0 +1,119 @@ +package cacheitem + +import ( + "archive/tar" + "bufio" + "io" + "os" + "strings" + "time" + + "github.com/DataDog/zstd" + + "github.com/moby/sys/sequential" + "github.com/vercel/turbo/cli/internal/tarpatch" + "github.com/vercel/turbo/cli/internal/turbopath" +) + +// Create makes a new CacheItem at the specified path. +func Create(path turbopath.AbsoluteSystemPath) (*CacheItem, error) { + handle, err := path.OpenFile(os.O_WRONLY|os.O_CREATE|os.O_TRUNC|os.O_APPEND, 0644) + if err != nil { + return nil, err + } + + cacheItem := &CacheItem{ + Path: path, + handle: handle, + compressed: strings.HasSuffix(path.ToString(), ".zst"), + } + + cacheItem.init() + return cacheItem, nil +} + +// init prepares the CacheItem for writing. +// Wires all the writers end-to-end: +// tar.Writer -> zstd.Writer -> fileBuffer -> file +func (ci *CacheItem) init() { + fileBuffer := bufio.NewWriterSize(ci.handle, 2^20) // Flush to disk in 1mb chunks. + + var tw *tar.Writer + if ci.compressed { + zw := zstd.NewWriter(fileBuffer) + tw = tar.NewWriter(zw) + ci.zw = zw + } else { + tw = tar.NewWriter(fileBuffer) + } + + ci.tw = tw + ci.fileBuffer = fileBuffer +} + +// AddFile adds a user-cached item to the tar. +func (ci *CacheItem) AddFile(fsAnchor turbopath.AbsoluteSystemPath, filePath turbopath.AnchoredSystemPath) error { + // Calculate the fully-qualified path to the file to read it. + sourcePath := filePath.RestoreAnchor(fsAnchor) + + // We grab the FileInfo which tar.FileInfoHeader accepts. + fileInfo, lstatErr := sourcePath.Lstat() + if lstatErr != nil { + return lstatErr + } + + // Determine if we need to populate the additional link argument to tar.FileInfoHeader. + var link string + if fileInfo.Mode()&os.ModeSymlink != 0 { + linkTarget, readlinkErr := sourcePath.Readlink() + if readlinkErr != nil { + return readlinkErr + } + link = linkTarget + } + + // Normalize the path within the cache. + cacheDestinationName := filePath.ToUnixPath() + + // Generate the the header. + // We do not use header generation from stdlib because it can throw an error. + header, headerErr := tarpatch.FileInfoHeader(cacheDestinationName, fileInfo, link) + if headerErr != nil { + return headerErr + } + + // Throw an error if trying to create a cache that contains a type we don't support. + if (header.Typeflag != tar.TypeReg) && (header.Typeflag != tar.TypeDir) && (header.Typeflag != tar.TypeSymlink) { + return errUnsupportedFileType + } + + // Consistent creation. + header.Uid = 0 + header.Gid = 0 + header.AccessTime = time.Unix(0, 0) + header.ModTime = time.Unix(0, 0) + header.ChangeTime = time.Unix(0, 0) + + // Always write the header. + if err := ci.tw.WriteHeader(header); err != nil { + return err + } + + // If there is a body to be written, do so. + if header.Typeflag == tar.TypeReg && header.Size > 0 { + // Windows has a distinct "sequential read" opening mode. + // We use a library that will switch to this mode for Windows. + sourceFile, sourceErr := sequential.OpenFile(sourcePath.ToString(), os.O_RDONLY, 0777) + if sourceErr != nil { + return sourceErr + } + + if _, err := io.Copy(ci.tw, sourceFile); err != nil { + return err + } + + return sourceFile.Close() + } + + return nil +} diff --git a/cli/internal/cacheitem/create_test.go b/cli/internal/cacheitem/create_test.go new file mode 100644 index 0000000..97eeb01 --- /dev/null +++ b/cli/internal/cacheitem/create_test.go @@ -0,0 +1,205 @@ +package cacheitem + +import ( + "encoding/hex" + "io/fs" + "os" + "runtime" + "testing" + + "github.com/vercel/turbo/cli/internal/turbopath" + "gotest.tools/v3/assert" +) + +type createFileDefinition struct { + Path turbopath.AnchoredSystemPath + Linkname string + fs.FileMode +} + +func createEntry(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error { + t.Helper() + if fileDefinition.FileMode.IsDir() { + return createDir(t, anchor, fileDefinition) + } else if fileDefinition.FileMode&os.ModeSymlink != 0 { + return createSymlink(t, anchor, fileDefinition) + } else if fileDefinition.FileMode&os.ModeNamedPipe != 0 { + return createFifo(t, anchor, fileDefinition) + } else { + return createFile(t, anchor, fileDefinition) + } +} + +func createDir(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error { + t.Helper() + path := fileDefinition.Path.RestoreAnchor(anchor) + mkdirAllErr := path.MkdirAllMode(fileDefinition.FileMode & 0777) + assert.NilError(t, mkdirAllErr, "MkdirAll") + return mkdirAllErr +} +func createFile(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error { + t.Helper() + path := fileDefinition.Path.RestoreAnchor(anchor) + writeErr := path.WriteFile([]byte("file contents"), fileDefinition.FileMode&0777) + assert.NilError(t, writeErr, "WriteFile") + return writeErr +} +func createSymlink(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error { + t.Helper() + path := fileDefinition.Path.RestoreAnchor(anchor) + symlinkErr := path.Symlink(fileDefinition.Linkname) + assert.NilError(t, symlinkErr, "Symlink") + lchmodErr := path.Lchmod(fileDefinition.FileMode & 0777) + assert.NilError(t, lchmodErr, "Lchmod") + return symlinkErr +} + +func TestCreate(t *testing.T) { + tests := []struct { + name string + files []createFileDefinition + wantDarwin string + wantUnix string + wantWindows string + wantErr error + }{ + { + name: "hello world", + files: []createFileDefinition{ + { + Path: turbopath.AnchoredSystemPath("hello world.txt"), + FileMode: 0 | 0644, + }, + }, + wantDarwin: "4f39f1cab23906f3b89f313392ef7c26f2586e1c15fa6b577cce640c4781d082817927b4875a5413bc23e1248f0b198218998d70e7336e8b1244542ba446ca07", + wantUnix: "4f39f1cab23906f3b89f313392ef7c26f2586e1c15fa6b577cce640c4781d082817927b4875a5413bc23e1248f0b198218998d70e7336e8b1244542ba446ca07", + wantWindows: "e304d1ba8c51209f97bd11dabf27ca06996b70a850db592343942c49480de47bcbb4b7131fb3dd4d7564021d3bc0e648919e4876572b46ac1da97fca92b009c5", + }, + { + name: "links", + files: []createFileDefinition{ + { + Path: turbopath.AnchoredSystemPath("one"), + Linkname: "two", + FileMode: 0 | os.ModeSymlink | 0777, + }, + { + Path: turbopath.AnchoredSystemPath("two"), + Linkname: "three", + FileMode: 0 | os.ModeSymlink | 0777, + }, + { + Path: turbopath.AnchoredSystemPath("three"), + Linkname: "real", + FileMode: 0 | os.ModeSymlink | 0777, + }, + { + Path: turbopath.AnchoredSystemPath("real"), + FileMode: 0 | 0644, + }, + }, + wantDarwin: "07278fdf37db4b212352367f391377bd6bac8f361dd834ae5522d809539bcf3b34d046873c1b45876d7372251446bb12c32f9fa9824914c4a1a01f6d7a206702", + wantUnix: "07278fdf37db4b212352367f391377bd6bac8f361dd834ae5522d809539bcf3b34d046873c1b45876d7372251446bb12c32f9fa9824914c4a1a01f6d7a206702", + wantWindows: "d4dac527e40860ee1ba3fdf2b9b12a1eba385050cf4f5877558dd531f0ecf2a06952fd5f88b852ad99e010943ed7b7f1437b727796369524e85f0c06f25d62c9", + }, + { + name: "subdirectory", + files: []createFileDefinition{ + { + Path: turbopath.AnchoredSystemPath("parent"), + FileMode: 0 | os.ModeDir | 0755, + }, + { + Path: turbopath.AnchoredSystemPath("parent/child"), + FileMode: 0 | 0644, + }, + }, + wantDarwin: "b513eea231daa84245d1d23d99fc398ccf17166ca49754ffbdcc1a3269cd75b7ad176a9c7095ff2481f71dca9fc350189747035f13d53b3a864e4fe35165233f", + wantUnix: "b513eea231daa84245d1d23d99fc398ccf17166ca49754ffbdcc1a3269cd75b7ad176a9c7095ff2481f71dca9fc350189747035f13d53b3a864e4fe35165233f", + wantWindows: "a8c3cba54e4dc214d3b21c3fa284d4032fe317d2f88943159efd5d16f3551ab53fae5c92ebf8acdd1bdb85d1238510b7938772cb11a0daa1b72b5e0f2700b5c7", + }, + { + name: "symlink permissions", + files: []createFileDefinition{ + { + Path: turbopath.AnchoredSystemPath("one"), + Linkname: "two", + FileMode: 0 | os.ModeSymlink | 0644, + }, + }, + wantDarwin: "3ea9d8a4581a0c2ba77557c72447b240c5ac622edcdac570a0bf597c276c2917b4ea73e6c373bbac593a480e396845651fa4b51e049531ff5d44c0adb807c2d9", + wantUnix: "99d953cbe1c0d8545e6f8382208fcefe14bcbefe39872f7b6310da14ac195b9a1b04b6d7b4b56f01a27216176193344a92488f99e124fcd68693f313f7137a1c", + wantWindows: "a4b1dc5c296f8ac4c9124727c1d84d70f72872c7bb4ced6d83ee312889e822baf1eaa72f88e624fb1aac4339d0a1f766ede77eabd2e4524eb26e89f883dc479d", + }, + { + name: "unsupported types error", + files: []createFileDefinition{ + { + Path: turbopath.AnchoredSystemPath("fifo"), + FileMode: 0 | os.ModeNamedPipe | 0644, + }, + }, + wantErr: errUnsupportedFileType, + }, + } + for _, tt := range tests { + getTestFunc := func(compressed bool) func(t *testing.T) { + return func(t *testing.T) { + inputDir := turbopath.AbsoluteSystemPath(t.TempDir()) + archiveDir := turbopath.AbsoluteSystemPath(t.TempDir()) + var archivePath turbopath.AbsoluteSystemPath + if compressed { + archivePath = turbopath.AnchoredSystemPath("out.tar.zst").RestoreAnchor(archiveDir) + } else { + archivePath = turbopath.AnchoredSystemPath("out.tar").RestoreAnchor(archiveDir) + } + + cacheItem, cacheCreateErr := Create(archivePath) + assert.NilError(t, cacheCreateErr, "Cache Create") + + for _, file := range tt.files { + createErr := createEntry(t, inputDir, file) + if createErr != nil { + assert.ErrorIs(t, createErr, tt.wantErr) + assert.NilError(t, cacheItem.Close(), "Close") + return + } + + addFileError := cacheItem.AddFile(inputDir, file.Path) + if addFileError != nil { + assert.ErrorIs(t, addFileError, tt.wantErr) + assert.NilError(t, cacheItem.Close(), "Close") + return + } + } + + assert.NilError(t, cacheItem.Close(), "Cache Close") + + // We only check for repeatability on compressed caches. + if compressed { + openedCacheItem, openedCacheItemErr := Open(archivePath) + assert.NilError(t, openedCacheItemErr, "Cache Open") + + // We actually only need to compare the generated SHA. + // That ensures we got the same output. (Effectively snapshots.) + // This must be called after `Close` because both `tar` and `gzip` have footers. + shaOne, shaOneErr := openedCacheItem.GetSha() + assert.NilError(t, shaOneErr, "GetSha") + snapshot := hex.EncodeToString(shaOne) + + switch runtime.GOOS { + case "darwin": + assert.Equal(t, snapshot, tt.wantDarwin, "Got expected hash.") + case "windows": + assert.Equal(t, snapshot, tt.wantWindows, "Got expected hash.") + default: + assert.Equal(t, snapshot, tt.wantUnix, "Got expected hash.") + } + assert.NilError(t, openedCacheItem.Close(), "Close") + } + } + } + t.Run(tt.name, getTestFunc(false)) + t.Run(tt.name+"zst", getTestFunc(true)) + } +} diff --git a/cli/internal/cacheitem/create_unix_test.go b/cli/internal/cacheitem/create_unix_test.go new file mode 100644 index 0000000..812d1eb --- /dev/null +++ b/cli/internal/cacheitem/create_unix_test.go @@ -0,0 +1,20 @@ +//go:build darwin || linux +// +build darwin linux + +package cacheitem + +import ( + "syscall" + "testing" + + "github.com/vercel/turbo/cli/internal/turbopath" + "gotest.tools/v3/assert" +) + +func createFifo(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error { + t.Helper() + path := fileDefinition.Path.RestoreAnchor(anchor) + fifoErr := syscall.Mknod(path.ToString(), syscall.S_IFIFO|0666, 0) + assert.NilError(t, fifoErr, "FIFO") + return fifoErr +} diff --git a/cli/internal/cacheitem/create_windows_test.go b/cli/internal/cacheitem/create_windows_test.go new file mode 100644 index 0000000..2cbb8b9 --- /dev/null +++ b/cli/internal/cacheitem/create_windows_test.go @@ -0,0 +1,14 @@ +//go:build windows +// +build windows + +package cacheitem + +import ( + "testing" + + "github.com/vercel/turbo/cli/internal/turbopath" +) + +func createFifo(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error { + return errUnsupportedFileType +} diff --git a/cli/internal/cacheitem/filepath.go b/cli/internal/cacheitem/filepath.go new file mode 100644 index 0000000..4fd1681 --- /dev/null +++ b/cli/internal/cacheitem/filepath.go @@ -0,0 +1,162 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cacheitem + +import "os" + +const _separator = os.PathSeparator + +// A lazybuf is a lazily constructed path buffer. +// It supports append, reading previously appended bytes, +// and retrieving the final string. It does not allocate a buffer +// to hold the output until that output diverges from s. +type lazybuf struct { + path string + buf []byte + w int + volAndPath string + volLen int +} + +func (b *lazybuf) index(i int) byte { + if b.buf != nil { + return b.buf[i] + } + return b.path[i] +} + +func (b *lazybuf) append(c byte) { + if b.buf == nil { + if b.w < len(b.path) && b.path[b.w] == c { + b.w++ + return + } + b.buf = make([]byte, len(b.path)) + copy(b.buf, b.path[:b.w]) + } + b.buf[b.w] = c + b.w++ +} + +func (b *lazybuf) string() string { + if b.buf == nil { + return b.volAndPath[:b.volLen+b.w] + } + return b.volAndPath[:b.volLen] + string(b.buf[:b.w]) +} + +// Clean is extracted from stdlib and removes `FromSlash` processing +// of the stdlib version. +// +// Clean returns the shortest path name equivalent to path +// by purely lexical processing. It applies the following rules +// iteratively until no further processing can be done: +// +// 1. Replace multiple Separator elements with a single one. +// 2. Eliminate each . path name element (the current directory). +// 3. Eliminate each inner .. path name element (the parent directory) +// along with the non-.. element that precedes it. +// 4. Eliminate .. elements that begin a rooted path: +// that is, replace "/.." by "/" at the beginning of a path, +// assuming Separator is '/'. +// +// The returned path ends in a slash only if it represents a root directory, +// such as "/" on Unix or `C:\` on Windows. +// +// Finally, any occurrences of slash are replaced by Separator. +// +// If the result of this process is an empty string, Clean +// returns the string ".". +// +// See also Rob Pike, “Lexical File Names in Plan 9 or +// Getting Dot-Dot Right,” +// https://9p.io/sys/doc/lexnames.html +func Clean(path string) string { + originalPath := path + volLen := volumeNameLen(path) + path = path[volLen:] + if path == "" { + if volLen > 1 && originalPath[1] != ':' { + // should be UNC + // ORIGINAL: return FromSlash(originalPath) + return originalPath + } + return originalPath + "." + } + rooted := os.IsPathSeparator(path[0]) + + // Invariants: + // reading from path; r is index of next byte to process. + // writing to buf; w is index of next byte to write. + // dotdot is index in buf where .. must stop, either because + // it is the leading slash or it is a leading ../../.. prefix. + n := len(path) + out := lazybuf{path: path, volAndPath: originalPath, volLen: volLen} + r, dotdot := 0, 0 + if rooted { + out.append(_separator) + r, dotdot = 1, 1 + } + + for r < n { + switch { + case os.IsPathSeparator(path[r]): + // empty path element + r++ + case path[r] == '.' && r+1 == n: + // . element + r++ + case path[r] == '.' && os.IsPathSeparator(path[r+1]): + // ./ element + r++ + + for r < len(path) && os.IsPathSeparator(path[r]) { + r++ + } + if out.w == 0 && volumeNameLen(path[r:]) > 0 { + // When joining prefix "." and an absolute path on Windows, + // the prefix should not be removed. + out.append('.') + } + case path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])): + // .. element: remove to last separator + r += 2 + switch { + case out.w > dotdot: + // can backtrack + out.w-- + for out.w > dotdot && !os.IsPathSeparator(out.index(out.w)) { + out.w-- + } + case !rooted: + // cannot backtrack, but not rooted, so append .. element. + if out.w > 0 { + out.append(_separator) + } + out.append('.') + out.append('.') + dotdot = out.w + } + default: + // real path element. + // add slash if needed + if rooted && out.w != 1 || !rooted && out.w != 0 { + out.append(_separator) + } + // copy element + for ; r < n && !os.IsPathSeparator(path[r]); r++ { + out.append(path[r]) + } + } + } + + // Turn empty string into "." + if out.w == 0 { + out.append('.') + } + + // ORIGINAL: return FromSlash(out.string()) + return out.string() +} diff --git a/cli/internal/cacheitem/filepath_unix.go b/cli/internal/cacheitem/filepath_unix.go new file mode 100644 index 0000000..d0f6786 --- /dev/null +++ b/cli/internal/cacheitem/filepath_unix.go @@ -0,0 +1,14 @@ +//go:build !windows +// +build !windows + +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cacheitem + +// volumeNameLen returns length of the leading volume name on Windows. +// It returns 0 elsewhere. +func volumeNameLen(path string) int { + return 0 +} diff --git a/cli/internal/cacheitem/filepath_windows.go b/cli/internal/cacheitem/filepath_windows.go new file mode 100644 index 0000000..2c3b852 --- /dev/null +++ b/cli/internal/cacheitem/filepath_windows.go @@ -0,0 +1,50 @@ +//go:build windows +// +build windows + +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cacheitem + +func isSlash(c uint8) bool { + return c == '\\' || c == '/' +} + +// volumeNameLen returns length of the leading volume name on Windows. +// It returns 0 elsewhere. +func volumeNameLen(path string) int { + if len(path) < 2 { + return 0 + } + // with drive letter + c := path[0] + if path[1] == ':' && ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { + return 2 + } + // is it UNC? https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx + if l := len(path); l >= 5 && isSlash(path[0]) && isSlash(path[1]) && + !isSlash(path[2]) && path[2] != '.' { + // first, leading `\\` and next shouldn't be `\`. its server name. + for n := 3; n < l-1; n++ { + // second, next '\' shouldn't be repeated. + if isSlash(path[n]) { + n++ + // third, following something characters. its share name. + if !isSlash(path[n]) { + if path[n] == '.' { + break + } + for ; n < l; n++ { + if isSlash(path[n]) { + break + } + } + return n + } + break + } + } + } + return 0 +} diff --git a/cli/internal/cacheitem/restore.go b/cli/internal/cacheitem/restore.go new file mode 100644 index 0000000..347b996 --- /dev/null +++ b/cli/internal/cacheitem/restore.go @@ -0,0 +1,200 @@ +package cacheitem + +import ( + "archive/tar" + "errors" + "io" + "os" + "runtime" + "strings" + + "github.com/DataDog/zstd" + + "github.com/moby/sys/sequential" + "github.com/vercel/turbo/cli/internal/turbopath" +) + +// Open returns an existing CacheItem at the specified path. +func Open(path turbopath.AbsoluteSystemPath) (*CacheItem, error) { + handle, err := sequential.OpenFile(path.ToString(), os.O_RDONLY, 0777) + if err != nil { + return nil, err + } + + return &CacheItem{ + Path: path, + handle: handle, + compressed: strings.HasSuffix(path.ToString(), ".zst"), + }, nil +} + +// Restore extracts a cache to a specified disk location. +func (ci *CacheItem) Restore(anchor turbopath.AbsoluteSystemPath) ([]turbopath.AnchoredSystemPath, error) { + var tr *tar.Reader + var closeError error + + // We're reading a tar, possibly wrapped in zstd. + if ci.compressed { + zr := zstd.NewReader(ci.handle) + + // The `Close` function for compression effectively just returns the singular + // error field on the decompressor instance. This is extremely unlikely to be + // set without triggering one of the numerous other errors, but we should still + // handle that possible edge case. + defer func() { closeError = zr.Close() }() + tr = tar.NewReader(zr) + } else { + tr = tar.NewReader(ci.handle) + } + + // On first attempt to restore it's possible that a link target doesn't exist. + // Save them and topsort them. + var symlinks []*tar.Header + + restored := make([]turbopath.AnchoredSystemPath, 0) + + restorePointErr := anchor.MkdirAll(0755) + if restorePointErr != nil { + return nil, restorePointErr + } + + // We're going to make the following two assumptions here for "fast" path restoration: + // - All directories are enumerated in the `tar`. + // - The contents of the tar are enumerated depth-first. + // + // This allows us to avoid: + // - Attempts at recursive creation of directories. + // - Repetitive `lstat` on restore of a file. + // + // Violating these assumptions won't cause things to break but we're only going to maintain + // an `lstat` cache for the current tree. If you violate these assumptions and the current + // cache does not apply for your path, it will clobber and re-start from the common + // shared prefix. + dirCache := &cachedDirTree{ + anchorAtDepth: []turbopath.AbsoluteSystemPath{anchor}, + } + + for { + header, trErr := tr.Next() + if trErr == io.EOF { + // The end, time to restore any missing links. + symlinksRestored, symlinksErr := topologicallyRestoreSymlinks(dirCache, anchor, symlinks, tr) + restored = append(restored, symlinksRestored...) + if symlinksErr != nil { + return restored, symlinksErr + } + + break + } + if trErr != nil { + return restored, trErr + } + + // The reader will not advance until tr.Next is called. + // We can treat this as file metadata + body reader. + + // Attempt to place the file on disk. + file, restoreErr := restoreEntry(dirCache, anchor, header, tr) + if restoreErr != nil { + if errors.Is(restoreErr, errMissingSymlinkTarget) { + // Links get one shot to be valid, then they're accumulated, DAG'd, and restored on delay. + symlinks = append(symlinks, header) + continue + } + return restored, restoreErr + } + restored = append(restored, file) + } + + return restored, closeError +} + +// restoreRegular is the entry point for all things read from the tar. +func restoreEntry(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header, reader *tar.Reader) (turbopath.AnchoredSystemPath, error) { + // We're permissive on creation, but restrictive on restoration. + // There is no need to prevent the cache creation in any case. + // And on restoration, if we fail, we simply run the task. + switch header.Typeflag { + case tar.TypeDir: + return restoreDirectory(dirCache, anchor, header) + case tar.TypeReg: + return restoreRegular(dirCache, anchor, header, reader) + case tar.TypeSymlink: + return restoreSymlink(dirCache, anchor, header) + default: + return "", errUnsupportedFileType + } +} + +// canonicalizeName returns either an AnchoredSystemPath or an error. +func canonicalizeName(name string) (turbopath.AnchoredSystemPath, error) { + // Assuming this was a `turbo`-created input, we currently have an AnchoredUnixPath. + // Assuming this is malicious input we don't really care if we do the wrong thing. + wellFormed, windowsSafe := checkName(name) + + // Determine if the future filename is a well-formed AnchoredUnixPath + if !wellFormed { + return "", errNameMalformed + } + + // Determine if the AnchoredUnixPath is safe to be used on Windows + if runtime.GOOS == "windows" && !windowsSafe { + return "", errNameWindowsUnsafe + } + + // Directories will have a trailing slash. Remove it. + noTrailingSlash := strings.TrimSuffix(name, "/") + + // Okay, we're all set here. + return turbopath.AnchoredUnixPathFromUpstream(noTrailingSlash).ToSystemPath(), nil +} + +// checkName returns `wellFormed, windowsSafe` via inspection of separators and traversal +func checkName(name string) (bool, bool) { + length := len(name) + + // Name is of length 0. + if length == 0 { + return false, false + } + + wellFormed := true + windowsSafe := true + + // Name is: + // - "." + // - ".." + if wellFormed && (name == "." || name == "..") { + wellFormed = false + } + + // Name starts with: + // - `/` + // - `./` + // - `../` + if wellFormed && (strings.HasPrefix(name, "/") || strings.HasPrefix(name, "./") || strings.HasPrefix(name, "../")) { + wellFormed = false + } + + // Name ends in: + // - `/.` + // - `/..` + if wellFormed && (strings.HasSuffix(name, "/.") || strings.HasSuffix(name, "/..")) { + wellFormed = false + } + + // Name contains: + // - `//` + // - `/./` + // - `/../` + if wellFormed && (strings.Contains(name, "//") || strings.Contains(name, "/./") || strings.Contains(name, "/../")) { + wellFormed = false + } + + // Name contains: `\` + if strings.ContainsRune(name, '\\') { + windowsSafe = false + } + + return wellFormed, windowsSafe +} diff --git a/cli/internal/cacheitem/restore_directory.go b/cli/internal/cacheitem/restore_directory.go new file mode 100644 index 0000000..4704d66 --- /dev/null +++ b/cli/internal/cacheitem/restore_directory.go @@ -0,0 +1,144 @@ +package cacheitem + +import ( + "archive/tar" + "os" + "path/filepath" + "strings" + + "github.com/vercel/turbo/cli/internal/turbopath" +) + +// restoreDirectory restores a directory. +func restoreDirectory(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header) (turbopath.AnchoredSystemPath, error) { + processedName, err := canonicalizeName(header.Name) + if err != nil { + return "", err + } + + // We need to traverse `processedName` from base to root split at + // `os.Separator` to make sure we don't end up following a symlink + // outside of the restore path. + + // Create the directory. + if err := safeMkdirAll(dirCache, anchor, processedName, header.Mode); err != nil { + return "", err + } + + return processedName, nil +} + +type cachedDirTree struct { + anchorAtDepth []turbopath.AbsoluteSystemPath + prefix []turbopath.RelativeSystemPath +} + +func (cr *cachedDirTree) getStartingPoint(path turbopath.AnchoredSystemPath) (turbopath.AbsoluteSystemPath, []turbopath.RelativeSystemPath) { + pathSegmentStrings := strings.Split(path.ToString(), string(os.PathSeparator)) + pathSegments := make([]turbopath.RelativeSystemPath, len(pathSegmentStrings)) + for index, pathSegmentString := range pathSegmentStrings { + pathSegments[index] = turbopath.RelativeSystemPathFromUpstream(pathSegmentString) + } + + i := 0 + for i = 0; i < len(cr.prefix) && i < len(pathSegments); i++ { + if pathSegments[i] != cr.prefix[i] { + break + } + } + + // 0: root anchor, can't remove it. + cr.anchorAtDepth = cr.anchorAtDepth[:i+1] + + // 0: first prefix. + cr.prefix = cr.prefix[:i] + + return cr.anchorAtDepth[i], pathSegments[i:] +} + +func (cr *cachedDirTree) Update(anchor turbopath.AbsoluteSystemPath, newSegment turbopath.RelativeSystemPath) { + cr.anchorAtDepth = append(cr.anchorAtDepth, anchor) + cr.prefix = append(cr.prefix, newSegment) +} + +// safeMkdirAll creates all directories, assuming that the leaf node is a directory. +// FIXME: Recheck the symlink cache before creating a directory. +func safeMkdirAll(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, processedName turbopath.AnchoredSystemPath, mode int64) error { + // Iterate through path segments by os.Separator, appending them onto the anchor. + // Check to see if that path segment is a symlink with a target outside of anchor. + + // Pull the iteration starting point from thie directory cache. + calculatedAnchor, pathSegments := dirCache.getStartingPoint(processedName) + for _, segment := range pathSegments { + calculatedAnchor, checkPathErr := checkPath(anchor, calculatedAnchor, segment) + // We hit an existing directory or absolute path that was invalid. + if checkPathErr != nil { + return checkPathErr + } + + // Otherwise we continue and check the next segment. + dirCache.Update(calculatedAnchor, segment) + } + + // If we have made it here we know that it is safe to call os.MkdirAll + // on the Join of anchor and processedName. + // + // This could _still_ error, but we don't care. + return processedName.RestoreAnchor(anchor).MkdirAll(os.FileMode(mode)) +} + +// checkPath ensures that the resolved path (if restoring symlinks). +// It makes sure to never traverse outside of the anchor. +func checkPath(originalAnchor turbopath.AbsoluteSystemPath, accumulatedAnchor turbopath.AbsoluteSystemPath, segment turbopath.RelativeSystemPath) (turbopath.AbsoluteSystemPath, error) { + // Check if the segment itself is sneakily an absolute path... + // (looking at you, Windows. CON, AUX...) + if filepath.IsAbs(segment.ToString()) { + return "", errTraversal + } + + // Find out if this portion of the path is a symlink. + combinedPath := accumulatedAnchor.Join(segment) + fileInfo, err := combinedPath.Lstat() + + // Getting an error here means we failed to stat the path. + // Assume that means we're safe and continue. + if err != nil { + return combinedPath, nil + } + + // Find out if we have a symlink. + isSymlink := fileInfo.Mode()&os.ModeSymlink != 0 + + // If we don't have a symlink it's safe. + if !isSymlink { + return combinedPath, nil + } + + // Check to see if the symlink targets outside of the originalAnchor. + // We don't do eval symlinks because we could find ourself in a totally + // different place. + + // 1. Get the target. + linkTarget, readLinkErr := combinedPath.Readlink() + if readLinkErr != nil { + return "", readLinkErr + } + + // 2. See if the target is absolute. + if filepath.IsAbs(linkTarget) { + absoluteLinkTarget := turbopath.AbsoluteSystemPathFromUpstream(linkTarget) + if originalAnchor.HasPrefix(absoluteLinkTarget) { + return absoluteLinkTarget, nil + } + return "", errTraversal + } + + // 3. Target is relative (or absolute Windows on a Unix device) + relativeLinkTarget := turbopath.RelativeSystemPathFromUpstream(linkTarget) + computedTarget := accumulatedAnchor.UntypedJoin(linkTarget) + if computedTarget.HasPrefix(originalAnchor) { + // Need to recurse and make sure the target doesn't link out. + return checkPath(originalAnchor, accumulatedAnchor, relativeLinkTarget) + } + return "", errTraversal +} diff --git a/cli/internal/cacheitem/restore_directory_test.go b/cli/internal/cacheitem/restore_directory_test.go new file mode 100644 index 0000000..f75bd47 --- /dev/null +++ b/cli/internal/cacheitem/restore_directory_test.go @@ -0,0 +1,103 @@ +package cacheitem + +import ( + "reflect" + "testing" + + "github.com/vercel/turbo/cli/internal/turbopath" +) + +func Test_cachedDirTree_getStartingPoint(t *testing.T) { + testDir := turbopath.AbsoluteSystemPath("") + tests := []struct { + name string + + // STATE + cachedDirTree cachedDirTree + + // INPUT + path turbopath.AnchoredSystemPath + + // OUTPUT + calculatedAnchor turbopath.AbsoluteSystemPath + pathSegments []turbopath.RelativeSystemPath + }{ + { + name: "hello world", + cachedDirTree: cachedDirTree{ + anchorAtDepth: []turbopath.AbsoluteSystemPath{testDir}, + prefix: []turbopath.RelativeSystemPath{}, + }, + path: turbopath.AnchoredUnixPath("hello/world").ToSystemPath(), + calculatedAnchor: testDir, + pathSegments: []turbopath.RelativeSystemPath{"hello", "world"}, + }, + { + name: "has a cache", + cachedDirTree: cachedDirTree{ + anchorAtDepth: []turbopath.AbsoluteSystemPath{ + testDir, + testDir.UntypedJoin("hello"), + }, + prefix: []turbopath.RelativeSystemPath{"hello"}, + }, + path: turbopath.AnchoredUnixPath("hello/world").ToSystemPath(), + calculatedAnchor: testDir.UntypedJoin("hello"), + pathSegments: []turbopath.RelativeSystemPath{"world"}, + }, + { + name: "ask for yourself", + cachedDirTree: cachedDirTree{ + anchorAtDepth: []turbopath.AbsoluteSystemPath{ + testDir, + testDir.UntypedJoin("hello"), + testDir.UntypedJoin("hello", "world"), + }, + prefix: []turbopath.RelativeSystemPath{"hello", "world"}, + }, + path: turbopath.AnchoredUnixPath("hello/world").ToSystemPath(), + calculatedAnchor: testDir.UntypedJoin("hello", "world"), + pathSegments: []turbopath.RelativeSystemPath{}, + }, + { + name: "three layer cake", + cachedDirTree: cachedDirTree{ + anchorAtDepth: []turbopath.AbsoluteSystemPath{ + testDir, + testDir.UntypedJoin("hello"), + testDir.UntypedJoin("hello", "world"), + }, + prefix: []turbopath.RelativeSystemPath{"hello", "world"}, + }, + path: turbopath.AnchoredUnixPath("hello/world/again").ToSystemPath(), + calculatedAnchor: testDir.UntypedJoin("hello", "world"), + pathSegments: []turbopath.RelativeSystemPath{"again"}, + }, + { + name: "outside of cache hierarchy", + cachedDirTree: cachedDirTree{ + anchorAtDepth: []turbopath.AbsoluteSystemPath{ + testDir, + testDir.UntypedJoin("hello"), + testDir.UntypedJoin("hello", "world"), + }, + prefix: []turbopath.RelativeSystemPath{"hello", "world"}, + }, + path: turbopath.AnchoredUnixPath("somewhere/else").ToSystemPath(), + calculatedAnchor: testDir, + pathSegments: []turbopath.RelativeSystemPath{"somewhere", "else"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cr := tt.cachedDirTree + calculatedAnchor, pathSegments := cr.getStartingPoint(tt.path) + if !reflect.DeepEqual(calculatedAnchor, tt.calculatedAnchor) { + t.Errorf("cachedDirTree.getStartingPoint() calculatedAnchor = %v, want %v", calculatedAnchor, tt.calculatedAnchor) + } + if !reflect.DeepEqual(pathSegments, tt.pathSegments) { + t.Errorf("cachedDirTree.getStartingPoint() pathSegments = %v, want %v", pathSegments, tt.pathSegments) + } + }) + } +} diff --git a/cli/internal/cacheitem/restore_regular.go b/cli/internal/cacheitem/restore_regular.go new file mode 100644 index 0000000..ed8946e --- /dev/null +++ b/cli/internal/cacheitem/restore_regular.go @@ -0,0 +1,46 @@ +package cacheitem + +import ( + "archive/tar" + "io" + "os" + + "github.com/vercel/turbo/cli/internal/turbopath" +) + +// restoreRegular restores a file. +func restoreRegular(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header, reader *tar.Reader) (turbopath.AnchoredSystemPath, error) { + // Assuming this was a `turbo`-created input, we currently have an AnchoredUnixPath. + // Assuming this is malicious input we don't really care if we do the wrong thing. + processedName, err := canonicalizeName(header.Name) + if err != nil { + return "", err + } + + // We need to traverse `processedName` from base to root split at + // `os.Separator` to make sure we don't end up following a symlink + // outside of the restore path. + if err := safeMkdirFile(dirCache, anchor, processedName, header.Mode); err != nil { + return "", err + } + + // Create the file. + if f, err := processedName.RestoreAnchor(anchor).OpenFile(os.O_WRONLY|os.O_TRUNC|os.O_CREATE, os.FileMode(header.Mode)); err != nil { + return "", err + } else if _, err := io.Copy(f, reader); err != nil { + return "", err + } else if err := f.Close(); err != nil { + return "", err + } + return processedName, nil +} + +// safeMkdirAll creates all directories, assuming that the leaf node is a file. +func safeMkdirFile(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, processedName turbopath.AnchoredSystemPath, mode int64) error { + isRootFile := processedName.Dir() == "." + if !isRootFile { + return safeMkdirAll(dirCache, anchor, processedName.Dir(), 0755) + } + + return nil +} diff --git a/cli/internal/cacheitem/restore_symlink.go b/cli/internal/cacheitem/restore_symlink.go new file mode 100644 index 0000000..4cb29f5 --- /dev/null +++ b/cli/internal/cacheitem/restore_symlink.go @@ -0,0 +1,180 @@ +package cacheitem + +import ( + "archive/tar" + "io/fs" + "os" + "path/filepath" + + "github.com/pyr-sh/dag" + "github.com/vercel/turbo/cli/internal/turbopath" +) + +// restoreSymlink restores a symlink and errors if the target is missing. +func restoreSymlink(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header) (turbopath.AnchoredSystemPath, error) { + processedName, canonicalizeNameErr := canonicalizeName(header.Name) + if canonicalizeNameErr != nil { + return "", canonicalizeNameErr + } + + // Check to see if the target exists. + processedLinkname := canonicalizeLinkname(anchor, processedName, header.Linkname) + if _, err := os.Lstat(processedLinkname); err != nil { + return "", errMissingSymlinkTarget + } + + return actuallyRestoreSymlink(dirCache, anchor, processedName, header) +} + +// restoreSymlinkMissingTarget restores a symlink and does not error if the target is missing. +func restoreSymlinkMissingTarget(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header) (turbopath.AnchoredSystemPath, error) { + processedName, canonicalizeNameErr := canonicalizeName(header.Name) + if canonicalizeNameErr != nil { + return "", canonicalizeNameErr + } + + return actuallyRestoreSymlink(dirCache, anchor, processedName, header) +} + +func actuallyRestoreSymlink(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, processedName turbopath.AnchoredSystemPath, header *tar.Header) (turbopath.AnchoredSystemPath, error) { + // We need to traverse `processedName` from base to root split at + // `os.Separator` to make sure we don't end up following a symlink + // outside of the restore path. + if err := safeMkdirFile(dirCache, anchor, processedName, header.Mode); err != nil { + return "", err + } + + // Specify where we restoring this symlink. + symlinkFrom := processedName.RestoreAnchor(anchor) + + // Remove any existing object at that location. + // If it errors we'll catch it on creation. + _ = symlinkFrom.Remove() + + // Create the symlink. + // Explicitly uses the _original_ header.Linkname as the target. + // This does not support file names with `\` in them in a cross-platform manner. + symlinkErr := symlinkFrom.Symlink(header.Linkname) + if symlinkErr != nil { + return "", symlinkErr + } + + // Darwin allows you to change the permissions of a symlink. + lchmodErr := symlinkFrom.Lchmod(fs.FileMode(header.Mode)) + if lchmodErr != nil { + return "", lchmodErr + } + + return processedName, nil +} + +// topologicallyRestoreSymlinks ensures that targets of symlinks are created in advance +// of the things that link to them. It does this by topologically sorting all +// of the symlinks. This also enables us to ensure we do not create cycles. +func topologicallyRestoreSymlinks(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, symlinks []*tar.Header, tr *tar.Reader) ([]turbopath.AnchoredSystemPath, error) { + restored := make([]turbopath.AnchoredSystemPath, 0) + lookup := make(map[string]*tar.Header) + + var g dag.AcyclicGraph + for _, header := range symlinks { + processedName, err := canonicalizeName(header.Name) + processedSourcename := canonicalizeLinkname(anchor, processedName, processedName.ToString()) + processedLinkname := canonicalizeLinkname(anchor, processedName, header.Linkname) + if err != nil { + return nil, err + } + g.Add(processedSourcename) + g.Add(processedLinkname) + g.Connect(dag.BasicEdge(processedLinkname, processedSourcename)) + lookup[processedSourcename] = header + } + + cycles := g.Cycles() + if cycles != nil { + return restored, errCycleDetected + } + + roots := make(dag.Set) + for _, v := range g.Vertices() { + if g.UpEdges(v).Len() == 0 { + roots.Add(v) + } + } + + walkFunc := func(vertex dag.Vertex, depth int) error { + key, ok := vertex.(string) + if !ok { + return nil + } + header, exists := lookup[key] + if !exists { + return nil + } + + file, restoreErr := restoreSymlinkMissingTarget(dirCache, anchor, header) + if restoreErr != nil { + return restoreErr + } + + restored = append(restored, file) + return nil + } + + walkError := g.DepthFirstWalk(roots, walkFunc) + if walkError != nil { + return restored, walkError + } + + return restored, nil +} + +// canonicalizeLinkname determines (lexically) what the resolved path on the +// system will be when linkname is restored verbatim. +func canonicalizeLinkname(anchor turbopath.AbsoluteSystemPath, processedName turbopath.AnchoredSystemPath, linkname string) string { + // We don't know _anything_ about linkname. It could be any of: + // + // - Absolute Unix Path + // - Absolute Windows Path + // - Relative Unix Path + // - Relative Windows Path + // + // We also can't _truly_ distinguish if the path is Unix or Windows. + // Take for example: `/Users/turbobot/weird-filenames/\foo\/lol` + // It is a valid file on Unix, but if we do slash conversion it breaks. + // Or `i\am\a\normal\unix\file\but\super\nested\on\windows`. + // + // We also can't safely assume that paths in link targets on one platform + // should be treated as targets for that platform. The author may be + // generating an artifact that should work on Windows on a Unix device. + // + // Given all of that, our best option is to restore link targets _verbatim_. + // No modification, no slash conversion. + // + // In order to DAG sort them, however, we do need to canonicalize them. + // We canonicalize them as if we're restoring them verbatim. + // + // 0. We've extracted a version of `Clean` from stdlib which does nothing but + // separator and traversal collapsing. + cleanedLinkname := Clean(linkname) + + // 1. Check to see if the link target is absolute _on the current platform_. + // If it is an absolute path it's canonical by rule. + if filepath.IsAbs(cleanedLinkname) { + return cleanedLinkname + } + + // Remaining options: + // - Absolute (other platform) Path + // - Relative Unix Path + // - Relative Windows Path + // + // At this point we simply assume that it's a relative path—no matter + // which separators appear in it and where they appear, We can't do + // anything else because the OS will also treat it like that when it is + // a link target. + // + // We manually join these to avoid calls to stdlib's `Clean`. + source := processedName.RestoreAnchor(anchor) + canonicalized := source.Dir().ToString() + string(os.PathSeparator) + cleanedLinkname + return Clean(canonicalized) +} diff --git a/cli/internal/cacheitem/restore_test.go b/cli/internal/cacheitem/restore_test.go new file mode 100644 index 0000000..a0a33d6 --- /dev/null +++ b/cli/internal/cacheitem/restore_test.go @@ -0,0 +1,1493 @@ +package cacheitem + +import ( + "archive/tar" + "errors" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "reflect" + "runtime" + "syscall" + "testing" + + "github.com/DataDog/zstd" + "github.com/vercel/turbo/cli/internal/turbopath" + "gotest.tools/v3/assert" +) + +type tarFile struct { + Body string + *tar.Header +} + +type restoreFile struct { + Name turbopath.AnchoredUnixPath + Linkname string + fs.FileMode +} + +// generateTar is used specifically to generate tar files that Turborepo would +// rarely or never encounter without malicious or pathological inputs. We use it +// to make sure that we respond well in these scenarios during restore attempts. +func generateTar(t *testing.T, files []tarFile) turbopath.AbsoluteSystemPath { + t.Helper() + testDir := turbopath.AbsoluteSystemPath(t.TempDir()) + testArchivePath := testDir.UntypedJoin("out.tar") + + handle, handleCreateErr := testArchivePath.Create() + assert.NilError(t, handleCreateErr, "os.Create") + + tw := tar.NewWriter(handle) + + for _, file := range files { + if file.Header.Typeflag == tar.TypeReg { + file.Header.Size = int64(len(file.Body)) + } + + writeHeaderErr := tw.WriteHeader(file.Header) + assert.NilError(t, writeHeaderErr, "tw.WriteHeader") + + _, writeErr := tw.Write([]byte(file.Body)) + assert.NilError(t, writeErr, "tw.Write") + } + + twCloseErr := tw.Close() + assert.NilError(t, twCloseErr, "tw.Close") + + handleCloseErr := handle.Close() + assert.NilError(t, handleCloseErr, "handle.Close") + + return testArchivePath +} + +// compressTar splits the compression of a tar file so that we don't +// accidentally diverge in tar creation while still being able to test +// restoration from tar and from .tar.zst. +func compressTar(t *testing.T, archivePath turbopath.AbsoluteSystemPath) turbopath.AbsoluteSystemPath { + t.Helper() + + inputHandle, inputHandleOpenErr := archivePath.Open() + assert.NilError(t, inputHandleOpenErr, "os.Open") + + outputPath := archivePath + ".zst" + outputHandle, outputHandleCreateErr := outputPath.Create() + assert.NilError(t, outputHandleCreateErr, "os.Create") + + zw := zstd.NewWriter(outputHandle) + _, copyError := io.Copy(zw, inputHandle) + assert.NilError(t, copyError, "io.Copy") + + zwCloseErr := zw.Close() + assert.NilError(t, zwCloseErr, "zw.Close") + + inputHandleCloseErr := inputHandle.Close() + assert.NilError(t, inputHandleCloseErr, "inputHandle.Close") + + outputHandleCloseErr := outputHandle.Close() + assert.NilError(t, outputHandleCloseErr, "outputHandle.Close") + + return outputPath +} + +func generateAnchor(t *testing.T) turbopath.AbsoluteSystemPath { + t.Helper() + testDir := turbopath.AbsoluteSystemPath(t.TempDir()) + anchorPoint := testDir.UntypedJoin("anchor") + + mkdirErr := anchorPoint.Mkdir(0777) + assert.NilError(t, mkdirErr, "Mkdir") + + return anchorPoint +} + +func assertFileExists(t *testing.T, anchor turbopath.AbsoluteSystemPath, diskFile restoreFile) { + t.Helper() + // If we have gotten here we can assume this to be true. + processedName := diskFile.Name.ToSystemPath() + fullName := processedName.RestoreAnchor(anchor) + fileInfo, err := fullName.Lstat() + assert.NilError(t, err, "Lstat") + + assert.Equal(t, fileInfo.Mode()&fs.ModePerm, diskFile.FileMode&fs.ModePerm, "File has the expected permissions: "+processedName) + assert.Equal(t, fileInfo.Mode()|fs.ModePerm, diskFile.FileMode|fs.ModePerm, "File has the expected mode.") + + if diskFile.FileMode&os.ModeSymlink != 0 { + linkname, err := fullName.Readlink() + assert.NilError(t, err, "Readlink") + + // We restore Linkname verbatim. + assert.Equal(t, linkname, diskFile.Linkname, "Link target matches.") + } +} + +func TestOpen(t *testing.T) { + type wantErr struct { + unix error + windows error + } + type wantOutput struct { + unix []turbopath.AnchoredSystemPath + windows []turbopath.AnchoredSystemPath + } + type wantFiles struct { + unix []restoreFile + windows []restoreFile + } + tests := []struct { + name string + tarFiles []tarFile + wantOutput wantOutput + wantFiles wantFiles + wantErr wantErr + }{ + { + name: "cache optimized", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "one/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/three/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/three/file-one", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/three/file-two", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/a/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/a/file", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/b/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/b/file", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "one", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two/three", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two/three/file-one", + FileMode: 0644, + }, + { + Name: "one/two/three/file-two", + FileMode: 0644, + }, + { + Name: "one/two/a", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two/a/file", + FileMode: 0644, + }, + { + Name: "one/two/b", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two/b/file", + FileMode: 0644, + }, + }, + windows: []restoreFile{ + { + Name: "one", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two/three", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two/three/file-one", + FileMode: 0666, + }, + { + Name: "one/two/three/file-two", + FileMode: 0666, + }, + { + Name: "one/two/a", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two/a/file", + FileMode: 0666, + }, + { + Name: "one/two/b", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two/b/file", + FileMode: 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{ + "one", + "one/two", + "one/two/three", + "one/two/three/file-one", + "one/two/three/file-two", + "one/two/a", + "one/two/a/file", + "one/two/b", + "one/two/b/file", + }.ToSystemPathArray(), + }, + }, + { + name: "pathological cache works", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "one/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/a/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/b/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/three/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/a/file", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/b/file", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/three/file-one", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/three/file-two", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "one", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two/three", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two/three/file-one", + FileMode: 0644, + }, + { + Name: "one/two/three/file-two", + FileMode: 0644, + }, + { + Name: "one/two/a", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two/a/file", + FileMode: 0644, + }, + { + Name: "one/two/b", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "one/two/b/file", + FileMode: 0644, + }, + }, + windows: []restoreFile{ + { + Name: "one", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two/three", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two/three/file-one", + FileMode: 0666, + }, + { + Name: "one/two/three/file-two", + FileMode: 0666, + }, + { + Name: "one/two/a", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two/a/file", + FileMode: 0666, + }, + { + Name: "one/two/b", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "one/two/b/file", + FileMode: 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{ + "one", + "one/two", + "one/two/a", + "one/two/b", + "one/two/three", + "one/two/a/file", + "one/two/b/file", + "one/two/three/file-one", + "one/two/three/file-two", + }.ToSystemPathArray(), + }, + }, + { + name: "hello world", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "target", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + Body: "target", + }, + { + Header: &tar.Header{ + Name: "source", + Linkname: "target", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "source", + Linkname: "target", + FileMode: 0 | os.ModeSymlink | 0777, + }, + { + Name: "target", + FileMode: 0644, + }, + }, + windows: []restoreFile{ + { + Name: "source", + Linkname: "target", + FileMode: 0 | os.ModeSymlink | 0666, + }, + { + Name: "target", + FileMode: 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{"target", "source"}.ToSystemPathArray(), + }, + }, + { + name: "nested file", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "folder/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "folder/file", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + Body: "file", + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "folder", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "folder/file", + FileMode: 0644, + }, + }, + windows: []restoreFile{ + { + Name: "folder", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "folder/file", + FileMode: 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{"folder", "folder/file"}.ToSystemPathArray(), + }, + }, + { + name: "nested symlink", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "folder/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "folder/symlink", + Linkname: "../", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "folder/symlink/folder-sibling", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + Body: "folder-sibling", + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "folder", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "folder/symlink", + FileMode: 0 | os.ModeSymlink | 0777, + Linkname: "../", + }, + { + Name: "folder/symlink/folder-sibling", + FileMode: 0644, + }, + { + Name: "folder-sibling", + FileMode: 0644, + }, + }, + windows: []restoreFile{ + { + Name: "folder", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "folder/symlink", + FileMode: 0 | os.ModeSymlink | 0666, + Linkname: "..\\", + }, + { + Name: "folder/symlink/folder-sibling", + FileMode: 0666, + }, + { + Name: "folder-sibling", + FileMode: 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{"folder", "folder/symlink", "folder/symlink/folder-sibling"}.ToSystemPathArray(), + }, + }, + { + name: "pathological symlinks", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "one", + Linkname: "two", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "two", + Linkname: "three", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "three", + Linkname: "real", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "real", + Typeflag: tar.TypeReg, + Mode: 0755, + }, + Body: "real", + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "one", + Linkname: "two", + FileMode: 0 | os.ModeSymlink | 0777, + }, + { + Name: "two", + Linkname: "three", + FileMode: 0 | os.ModeSymlink | 0777, + }, + { + Name: "three", + Linkname: "real", + FileMode: 0 | os.ModeSymlink | 0777, + }, + { + Name: "real", + FileMode: 0 | 0755, + }, + }, + windows: []restoreFile{ + { + Name: "one", + Linkname: "two", + FileMode: 0 | os.ModeSymlink | 0666, + }, + { + Name: "two", + Linkname: "three", + FileMode: 0 | os.ModeSymlink | 0666, + }, + { + Name: "three", + Linkname: "real", + FileMode: 0 | os.ModeSymlink | 0666, + }, + { + Name: "real", + FileMode: 0 | 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{"real", "three", "two", "one"}.ToSystemPathArray(), + }, + }, + { + name: "place file at dir location", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "folder-not-file/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "folder-not-file/subfile", + Typeflag: tar.TypeReg, + Mode: 0755, + }, + Body: "subfile", + }, + { + Header: &tar.Header{ + Name: "folder-not-file", + Typeflag: tar.TypeReg, + Mode: 0755, + }, + Body: "this shouldn't work", + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "folder-not-file", + FileMode: 0 | os.ModeDir | 0755, + }, + { + Name: "folder-not-file/subfile", + FileMode: 0755, + }, + }, + windows: []restoreFile{ + { + Name: "folder-not-file", + FileMode: 0 | os.ModeDir | 0777, + }, + { + Name: "folder-not-file/subfile", + FileMode: 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{"folder-not-file", "folder-not-file/subfile"}.ToSystemPathArray(), + }, + wantErr: wantErr{ + unix: syscall.EISDIR, + windows: syscall.EISDIR, + }, + }, + // { + // name: "missing symlink with file at subdir", + // tarFiles: []tarFile{ + // { + // Header: &tar.Header{ + // Name: "one", + // Linkname: "two", + // Typeflag: tar.TypeSymlink, + // Mode: 0777, + // }, + // }, + // { + // Header: &tar.Header{ + // Name: "one/file", + // Typeflag: tar.TypeReg, + // Mode: 0755, + // }, + // Body: "file", + // }, + // }, + // wantFiles: wantFiles{ + // unix: []restoreFile{ + // { + // Name: "one", + // Linkname: "two", + // FileMode: 0 | os.ModeSymlink | 0777, + // }, + // }, + // }, + // wantOutput: wantOutput{ + // unix: turbopath.AnchoredUnixPathArray{"one"}.ToSystemPathArray(), + // windows: nil, + // }, + // wantErr: wantErr{ + // unix: os.ErrExist, + // windows: os.ErrExist, + // }, + // }, + { + name: "symlink cycle", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "one", + Linkname: "two", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "two", + Linkname: "three", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "three", + Linkname: "one", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{}, + }, + wantOutput: wantOutput{ + unix: []turbopath.AnchoredSystemPath{}, + }, + wantErr: wantErr{ + unix: errCycleDetected, + windows: errCycleDetected, + }, + }, + { + name: "symlink clobber", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "one", + Linkname: "two", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "one", + Linkname: "three", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "one", + Linkname: "real", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "real", + Typeflag: tar.TypeReg, + Mode: 0755, + }, + Body: "real", + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "one", + Linkname: "real", + FileMode: 0 | os.ModeSymlink | 0777, + }, + { + Name: "real", + FileMode: 0755, + }, + }, + windows: []restoreFile{ + { + Name: "one", + Linkname: "real", + FileMode: 0 | os.ModeSymlink | 0666, + }, + { + Name: "real", + FileMode: 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{"real", "one"}.ToSystemPathArray(), + }, + }, + { + name: "symlink traversal", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "escape", + Linkname: "../", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "escape/file", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + Body: "file", + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "escape", + Linkname: "../", + FileMode: 0 | os.ModeSymlink | 0777, + }, + }, + windows: []restoreFile{ + { + Name: "escape", + Linkname: "..\\", + FileMode: 0 | os.ModeSymlink | 0666, + }, + }, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{"escape"}.ToSystemPathArray(), + }, + wantErr: wantErr{ + unix: errTraversal, + windows: errTraversal, + }, + }, + { + name: "Double indirection: file", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "up", + Linkname: "../", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "link", + Linkname: "up", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "link/outside-file", + Typeflag: tar.TypeReg, + Mode: 0755, + }, + }, + }, + wantErr: wantErr{unix: errTraversal, windows: errTraversal}, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{ + "up", + "link", + }.ToSystemPathArray(), + }, + }, + { + name: "Double indirection: folder", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "up", + Linkname: "../", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "link", + Linkname: "up", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "link/level-one/level-two/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + }, + wantErr: wantErr{unix: errTraversal, windows: errTraversal}, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{ + "up", + "link", + }.ToSystemPathArray(), + }, + }, + { + name: "name traversal", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "../escape", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + Body: "file", + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{}, + }, + wantOutput: wantOutput{ + unix: []turbopath.AnchoredSystemPath{}, + }, + wantErr: wantErr{ + unix: errNameMalformed, + windows: errNameMalformed, + }, + }, + { + name: "windows unsafe", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "back\\slash\\file", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + Body: "file", + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{ + { + Name: "back\\slash\\file", + FileMode: 0644, + }, + }, + windows: []restoreFile{}, + }, + wantOutput: wantOutput{ + unix: turbopath.AnchoredUnixPathArray{"back\\slash\\file"}.ToSystemPathArray(), + windows: turbopath.AnchoredUnixPathArray{}.ToSystemPathArray(), + }, + wantErr: wantErr{ + unix: nil, + windows: errNameWindowsUnsafe, + }, + }, + { + name: "fifo (and others) unsupported", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "fifo", + Typeflag: tar.TypeFifo, + }, + }, + }, + wantFiles: wantFiles{ + unix: []restoreFile{}, + }, + wantOutput: wantOutput{ + unix: []turbopath.AnchoredSystemPath{}, + }, + wantErr: wantErr{ + unix: errUnsupportedFileType, + windows: errUnsupportedFileType, + }, + }, + } + for _, tt := range tests { + getTestFunc := func(compressed bool) func(t *testing.T) { + return func(t *testing.T) { + var archivePath turbopath.AbsoluteSystemPath + if compressed { + archivePath = compressTar(t, generateTar(t, tt.tarFiles)) + } else { + archivePath = generateTar(t, tt.tarFiles) + } + anchor := generateAnchor(t) + + cacheItem, err := Open(archivePath) + assert.NilError(t, err, "Open") + + restoreOutput, restoreErr := cacheItem.Restore(anchor) + var desiredErr error + if runtime.GOOS == "windows" { + desiredErr = tt.wantErr.windows + } else { + desiredErr = tt.wantErr.unix + } + if desiredErr != nil { + if !errors.Is(restoreErr, desiredErr) { + t.Errorf("wanted err: %v, got err: %v", tt.wantErr, restoreErr) + } + } else { + assert.NilError(t, restoreErr, "Restore") + } + + outputComparison := tt.wantOutput.unix + if runtime.GOOS == "windows" && tt.wantOutput.windows != nil { + outputComparison = tt.wantOutput.windows + } + + if !reflect.DeepEqual(restoreOutput, outputComparison) { + t.Errorf("Restore() = %v, want %v", restoreOutput, outputComparison) + } + + // Check files on disk. + filesComparison := tt.wantFiles.unix + if runtime.GOOS == "windows" && tt.wantFiles.windows != nil { + filesComparison = tt.wantFiles.windows + } + for _, diskFile := range filesComparison { + assertFileExists(t, anchor, diskFile) + } + + assert.NilError(t, cacheItem.Close(), "Close") + } + } + t.Run(tt.name+"zst", getTestFunc(true)) + t.Run(tt.name, getTestFunc(false)) + } +} + +func Test_checkName(t *testing.T) { + tests := []struct { + path string + wellFormed bool + windowsSafe bool + }{ + // Empty + { + path: "", + wellFormed: false, + windowsSafe: false, + }, + // Bad prefix + { + path: ".", + wellFormed: false, + windowsSafe: true, + }, + { + path: "..", + wellFormed: false, + windowsSafe: true, + }, + { + path: "/", + wellFormed: false, + windowsSafe: true, + }, + { + path: "./", + wellFormed: false, + windowsSafe: true, + }, + { + path: "../", + wellFormed: false, + windowsSafe: true, + }, + // Bad prefix, suffixed + { + path: "/a", + wellFormed: false, + windowsSafe: true, + }, + { + path: "./a", + wellFormed: false, + windowsSafe: true, + }, + { + path: "../a", + wellFormed: false, + windowsSafe: true, + }, + // Bad Suffix + { + path: "/.", + wellFormed: false, + windowsSafe: true, + }, + { + path: "/..", + wellFormed: false, + windowsSafe: true, + }, + // Bad Suffix, with prefix + { + path: "a/.", + wellFormed: false, + windowsSafe: true, + }, + { + path: "a/..", + wellFormed: false, + windowsSafe: true, + }, + // Bad middle + { + path: "//", + wellFormed: false, + windowsSafe: true, + }, + { + path: "/./", + wellFormed: false, + windowsSafe: true, + }, + { + path: "/../", + wellFormed: false, + windowsSafe: true, + }, + // Bad middle, prefixed + { + path: "a//", + wellFormed: false, + windowsSafe: true, + }, + { + path: "a/./", + wellFormed: false, + windowsSafe: true, + }, + { + path: "a/../", + wellFormed: false, + windowsSafe: true, + }, + // Bad middle, suffixed + { + path: "//a", + wellFormed: false, + windowsSafe: true, + }, + { + path: "/./a", + wellFormed: false, + windowsSafe: true, + }, + { + path: "/../a", + wellFormed: false, + windowsSafe: true, + }, + // Bad middle, wrapped + { + path: "a//a", + wellFormed: false, + windowsSafe: true, + }, + { + path: "a/./a", + wellFormed: false, + windowsSafe: true, + }, + { + path: "a/../a", + wellFormed: false, + windowsSafe: true, + }, + // False positive tests + { + path: "...", + wellFormed: true, + windowsSafe: true, + }, + { + path: ".../a", + wellFormed: true, + windowsSafe: true, + }, + { + path: "a/...", + wellFormed: true, + windowsSafe: true, + }, + { + path: "a/.../a", + wellFormed: true, + windowsSafe: true, + }, + { + path: ".../...", + wellFormed: true, + windowsSafe: true, + }, + } + for _, tt := range tests { + t.Run(fmt.Sprintf("Path: \"%v\"", tt.path), func(t *testing.T) { + wellFormed, windowsSafe := checkName(tt.path) + if wellFormed != tt.wellFormed || windowsSafe != tt.windowsSafe { + t.Errorf("\nwantOutput: checkName(\"%v\") wellFormed = %v, windowsSafe %v\ngot: checkName(\"%v\") wellFormed = %v, windowsSafe %v", tt.path, tt.wellFormed, tt.windowsSafe, tt.path, wellFormed, windowsSafe) + } + }) + } +} + +func Test_canonicalizeLinkname(t *testing.T) { + // We're lying that this thing is absolute, but that's not relevant for tests. + anchor := turbopath.AbsoluteSystemPath(filepath.Join("path", "to", "anchor")) + + tests := []struct { + name string + processedName turbopath.AnchoredSystemPath + linkname string + canonicalUnix string + canonicalWindows string + }{ + { + name: "hello world", + processedName: turbopath.AnchoredSystemPath("source"), + linkname: "target", + canonicalUnix: "path/to/anchor/target", + canonicalWindows: "path\\to\\anchor\\target", + }, + { + name: "Unix path subdirectory traversal", + processedName: turbopath.AnchoredUnixPath("child/source").ToSystemPath(), + linkname: "../sibling/target", + canonicalUnix: "path/to/anchor/sibling/target", + canonicalWindows: "path\\to\\anchor\\sibling\\target", + }, + { + name: "Windows path subdirectory traversal", + processedName: turbopath.AnchoredUnixPath("child/source").ToSystemPath(), + linkname: "..\\sibling\\target", + canonicalUnix: "path/to/anchor/child/..\\sibling\\target", + canonicalWindows: "path\\to\\anchor\\sibling\\target", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + canonical := tt.canonicalUnix + if runtime.GOOS == "windows" { + canonical = tt.canonicalWindows + } + if got := canonicalizeLinkname(anchor, tt.processedName, tt.linkname); got != canonical { + t.Errorf("canonicalizeLinkname() = %v, want %v", got, canonical) + } + }) + } +} + +func Test_canonicalizeName(t *testing.T) { + tests := []struct { + name string + fileName string + want turbopath.AnchoredSystemPath + wantErr error + }{ + { + name: "hello world", + fileName: "test.txt", + want: "test.txt", + }, + { + name: "directory", + fileName: "something/", + want: "something", + }, + { + name: "malformed name", + fileName: "//", + want: "", + wantErr: errNameMalformed, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := canonicalizeName(tt.fileName) + if tt.wantErr != nil && !errors.Is(err, tt.wantErr) { + t.Errorf("canonicalizeName() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("canonicalizeName() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCacheItem_Restore(t *testing.T) { + tests := []struct { + name string + tarFiles []tarFile + want []turbopath.AnchoredSystemPath + }{ + { + name: "duplicate restores", + tarFiles: []tarFile{ + { + Header: &tar.Header{ + Name: "target", + Typeflag: tar.TypeReg, + Mode: 0644, + }, + Body: "target", + }, + { + Header: &tar.Header{ + Name: "source", + Linkname: "target", + Typeflag: tar.TypeSymlink, + Mode: 0777, + }, + }, + { + Header: &tar.Header{ + Name: "one/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + { + Header: &tar.Header{ + Name: "one/two/", + Typeflag: tar.TypeDir, + Mode: 0755, + }, + }, + }, + want: turbopath.AnchoredUnixPathArray{"target", "source", "one", "one/two"}.ToSystemPathArray(), + }, + } + for _, tt := range tests { + getTestFunc := func(compressed bool) func(t *testing.T) { + return func(t *testing.T) { + var archivePath turbopath.AbsoluteSystemPath + if compressed { + archivePath = compressTar(t, generateTar(t, tt.tarFiles)) + } else { + archivePath = generateTar(t, tt.tarFiles) + } + anchor := generateAnchor(t) + + cacheItem, err := Open(archivePath) + assert.NilError(t, err, "Open") + + restoreOutput, restoreErr := cacheItem.Restore(anchor) + if !reflect.DeepEqual(restoreOutput, tt.want) { + t.Errorf("#1 CacheItem.Restore() = %v, want %v", restoreOutput, tt.want) + } + assert.NilError(t, restoreErr, "Restore #1") + assert.NilError(t, cacheItem.Close(), "Close") + + cacheItem2, err2 := Open(archivePath) + assert.NilError(t, err2, "Open") + + restoreOutput2, restoreErr2 := cacheItem2.Restore(anchor) + if !reflect.DeepEqual(restoreOutput2, tt.want) { + t.Errorf("#2 CacheItem.Restore() = %v, want %v", restoreOutput2, tt.want) + } + assert.NilError(t, restoreErr2, "Restore #2") + assert.NilError(t, cacheItem2.Close(), "Close") + } + } + t.Run(tt.name+"zst", getTestFunc(true)) + t.Run(tt.name, getTestFunc(false)) + } +} |
