aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/cli/internal/cacheitem
diff options
context:
space:
mode:
author简律纯 <hsiangnianian@outlook.com>2023-04-28 01:36:44 +0800
committer简律纯 <hsiangnianian@outlook.com>2023-04-28 01:36:44 +0800
commitdd84b9d64fb98746a230cd24233ff50a562c39c9 (patch)
treeb583261ef00b3afe72ec4d6dacb31e57779a6faf /cli/internal/cacheitem
parent0b46fcd72ac34382387b2bcf9095233efbcc52f4 (diff)
downloadHydroRoll-dd84b9d64fb98746a230cd24233ff50a562c39c9.tar.gz
HydroRoll-dd84b9d64fb98746a230cd24233ff50a562c39c9.zip
Diffstat (limited to 'cli/internal/cacheitem')
-rw-r--r--cli/internal/cacheitem/cacheitem.go76
-rw-r--r--cli/internal/cacheitem/create.go119
-rw-r--r--cli/internal/cacheitem/create_test.go205
-rw-r--r--cli/internal/cacheitem/create_unix_test.go20
-rw-r--r--cli/internal/cacheitem/create_windows_test.go14
-rw-r--r--cli/internal/cacheitem/filepath.go162
-rw-r--r--cli/internal/cacheitem/filepath_unix.go14
-rw-r--r--cli/internal/cacheitem/filepath_windows.go50
-rw-r--r--cli/internal/cacheitem/restore.go200
-rw-r--r--cli/internal/cacheitem/restore_directory.go144
-rw-r--r--cli/internal/cacheitem/restore_directory_test.go103
-rw-r--r--cli/internal/cacheitem/restore_regular.go46
-rw-r--r--cli/internal/cacheitem/restore_symlink.go180
-rw-r--r--cli/internal/cacheitem/restore_test.go1493
14 files changed, 2826 insertions, 0 deletions
diff --git a/cli/internal/cacheitem/cacheitem.go b/cli/internal/cacheitem/cacheitem.go
new file mode 100644
index 0000000..2fb2c3b
--- /dev/null
+++ b/cli/internal/cacheitem/cacheitem.go
@@ -0,0 +1,76 @@
+// Package cacheitem is an abstraction over the creation and restoration of a cache
+package cacheitem
+
+import (
+ "archive/tar"
+ "bufio"
+ "crypto/sha512"
+ "errors"
+ "io"
+ "os"
+
+ "github.com/vercel/turbo/cli/internal/turbopath"
+)
+
+var (
+ errMissingSymlinkTarget = errors.New("symlink restoration is delayed")
+ errCycleDetected = errors.New("links in the cache are cyclic")
+ errTraversal = errors.New("tar attempts to write outside of directory")
+ errNameMalformed = errors.New("file name is malformed")
+ errNameWindowsUnsafe = errors.New("file name is not Windows-safe")
+ errUnsupportedFileType = errors.New("attempted to restore unsupported file type")
+)
+
+// CacheItem is a `tar` utility with a little bit extra.
+type CacheItem struct {
+ // Path is the location on disk for the CacheItem.
+ Path turbopath.AbsoluteSystemPath
+ // Anchor is the position on disk at which the CacheItem will be restored.
+ Anchor turbopath.AbsoluteSystemPath
+
+ // For creation.
+ tw *tar.Writer
+ zw io.WriteCloser
+ fileBuffer *bufio.Writer
+ handle *os.File
+ compressed bool
+}
+
+// Close any open pipes
+func (ci *CacheItem) Close() error {
+ if ci.tw != nil {
+ if err := ci.tw.Close(); err != nil {
+ return err
+ }
+ }
+
+ if ci.zw != nil {
+ if err := ci.zw.Close(); err != nil {
+ return err
+ }
+ }
+
+ if ci.fileBuffer != nil {
+ if err := ci.fileBuffer.Flush(); err != nil {
+ return err
+ }
+ }
+
+ if ci.handle != nil {
+ if err := ci.handle.Close(); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// GetSha returns the SHA-512 hash for the CacheItem.
+func (ci *CacheItem) GetSha() ([]byte, error) {
+ sha := sha512.New()
+ if _, err := io.Copy(sha, ci.handle); err != nil {
+ return nil, err
+ }
+
+ return sha.Sum(nil), nil
+}
diff --git a/cli/internal/cacheitem/create.go b/cli/internal/cacheitem/create.go
new file mode 100644
index 0000000..ce5b1c8
--- /dev/null
+++ b/cli/internal/cacheitem/create.go
@@ -0,0 +1,119 @@
+package cacheitem
+
+import (
+ "archive/tar"
+ "bufio"
+ "io"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/DataDog/zstd"
+
+ "github.com/moby/sys/sequential"
+ "github.com/vercel/turbo/cli/internal/tarpatch"
+ "github.com/vercel/turbo/cli/internal/turbopath"
+)
+
+// Create makes a new CacheItem at the specified path.
+func Create(path turbopath.AbsoluteSystemPath) (*CacheItem, error) {
+ handle, err := path.OpenFile(os.O_WRONLY|os.O_CREATE|os.O_TRUNC|os.O_APPEND, 0644)
+ if err != nil {
+ return nil, err
+ }
+
+ cacheItem := &CacheItem{
+ Path: path,
+ handle: handle,
+ compressed: strings.HasSuffix(path.ToString(), ".zst"),
+ }
+
+ cacheItem.init()
+ return cacheItem, nil
+}
+
+// init prepares the CacheItem for writing.
+// Wires all the writers end-to-end:
+// tar.Writer -> zstd.Writer -> fileBuffer -> file
+func (ci *CacheItem) init() {
+ fileBuffer := bufio.NewWriterSize(ci.handle, 2^20) // Flush to disk in 1mb chunks.
+
+ var tw *tar.Writer
+ if ci.compressed {
+ zw := zstd.NewWriter(fileBuffer)
+ tw = tar.NewWriter(zw)
+ ci.zw = zw
+ } else {
+ tw = tar.NewWriter(fileBuffer)
+ }
+
+ ci.tw = tw
+ ci.fileBuffer = fileBuffer
+}
+
+// AddFile adds a user-cached item to the tar.
+func (ci *CacheItem) AddFile(fsAnchor turbopath.AbsoluteSystemPath, filePath turbopath.AnchoredSystemPath) error {
+ // Calculate the fully-qualified path to the file to read it.
+ sourcePath := filePath.RestoreAnchor(fsAnchor)
+
+ // We grab the FileInfo which tar.FileInfoHeader accepts.
+ fileInfo, lstatErr := sourcePath.Lstat()
+ if lstatErr != nil {
+ return lstatErr
+ }
+
+ // Determine if we need to populate the additional link argument to tar.FileInfoHeader.
+ var link string
+ if fileInfo.Mode()&os.ModeSymlink != 0 {
+ linkTarget, readlinkErr := sourcePath.Readlink()
+ if readlinkErr != nil {
+ return readlinkErr
+ }
+ link = linkTarget
+ }
+
+ // Normalize the path within the cache.
+ cacheDestinationName := filePath.ToUnixPath()
+
+ // Generate the the header.
+ // We do not use header generation from stdlib because it can throw an error.
+ header, headerErr := tarpatch.FileInfoHeader(cacheDestinationName, fileInfo, link)
+ if headerErr != nil {
+ return headerErr
+ }
+
+ // Throw an error if trying to create a cache that contains a type we don't support.
+ if (header.Typeflag != tar.TypeReg) && (header.Typeflag != tar.TypeDir) && (header.Typeflag != tar.TypeSymlink) {
+ return errUnsupportedFileType
+ }
+
+ // Consistent creation.
+ header.Uid = 0
+ header.Gid = 0
+ header.AccessTime = time.Unix(0, 0)
+ header.ModTime = time.Unix(0, 0)
+ header.ChangeTime = time.Unix(0, 0)
+
+ // Always write the header.
+ if err := ci.tw.WriteHeader(header); err != nil {
+ return err
+ }
+
+ // If there is a body to be written, do so.
+ if header.Typeflag == tar.TypeReg && header.Size > 0 {
+ // Windows has a distinct "sequential read" opening mode.
+ // We use a library that will switch to this mode for Windows.
+ sourceFile, sourceErr := sequential.OpenFile(sourcePath.ToString(), os.O_RDONLY, 0777)
+ if sourceErr != nil {
+ return sourceErr
+ }
+
+ if _, err := io.Copy(ci.tw, sourceFile); err != nil {
+ return err
+ }
+
+ return sourceFile.Close()
+ }
+
+ return nil
+}
diff --git a/cli/internal/cacheitem/create_test.go b/cli/internal/cacheitem/create_test.go
new file mode 100644
index 0000000..97eeb01
--- /dev/null
+++ b/cli/internal/cacheitem/create_test.go
@@ -0,0 +1,205 @@
+package cacheitem
+
+import (
+ "encoding/hex"
+ "io/fs"
+ "os"
+ "runtime"
+ "testing"
+
+ "github.com/vercel/turbo/cli/internal/turbopath"
+ "gotest.tools/v3/assert"
+)
+
+type createFileDefinition struct {
+ Path turbopath.AnchoredSystemPath
+ Linkname string
+ fs.FileMode
+}
+
+func createEntry(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error {
+ t.Helper()
+ if fileDefinition.FileMode.IsDir() {
+ return createDir(t, anchor, fileDefinition)
+ } else if fileDefinition.FileMode&os.ModeSymlink != 0 {
+ return createSymlink(t, anchor, fileDefinition)
+ } else if fileDefinition.FileMode&os.ModeNamedPipe != 0 {
+ return createFifo(t, anchor, fileDefinition)
+ } else {
+ return createFile(t, anchor, fileDefinition)
+ }
+}
+
+func createDir(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error {
+ t.Helper()
+ path := fileDefinition.Path.RestoreAnchor(anchor)
+ mkdirAllErr := path.MkdirAllMode(fileDefinition.FileMode & 0777)
+ assert.NilError(t, mkdirAllErr, "MkdirAll")
+ return mkdirAllErr
+}
+func createFile(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error {
+ t.Helper()
+ path := fileDefinition.Path.RestoreAnchor(anchor)
+ writeErr := path.WriteFile([]byte("file contents"), fileDefinition.FileMode&0777)
+ assert.NilError(t, writeErr, "WriteFile")
+ return writeErr
+}
+func createSymlink(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error {
+ t.Helper()
+ path := fileDefinition.Path.RestoreAnchor(anchor)
+ symlinkErr := path.Symlink(fileDefinition.Linkname)
+ assert.NilError(t, symlinkErr, "Symlink")
+ lchmodErr := path.Lchmod(fileDefinition.FileMode & 0777)
+ assert.NilError(t, lchmodErr, "Lchmod")
+ return symlinkErr
+}
+
+func TestCreate(t *testing.T) {
+ tests := []struct {
+ name string
+ files []createFileDefinition
+ wantDarwin string
+ wantUnix string
+ wantWindows string
+ wantErr error
+ }{
+ {
+ name: "hello world",
+ files: []createFileDefinition{
+ {
+ Path: turbopath.AnchoredSystemPath("hello world.txt"),
+ FileMode: 0 | 0644,
+ },
+ },
+ wantDarwin: "4f39f1cab23906f3b89f313392ef7c26f2586e1c15fa6b577cce640c4781d082817927b4875a5413bc23e1248f0b198218998d70e7336e8b1244542ba446ca07",
+ wantUnix: "4f39f1cab23906f3b89f313392ef7c26f2586e1c15fa6b577cce640c4781d082817927b4875a5413bc23e1248f0b198218998d70e7336e8b1244542ba446ca07",
+ wantWindows: "e304d1ba8c51209f97bd11dabf27ca06996b70a850db592343942c49480de47bcbb4b7131fb3dd4d7564021d3bc0e648919e4876572b46ac1da97fca92b009c5",
+ },
+ {
+ name: "links",
+ files: []createFileDefinition{
+ {
+ Path: turbopath.AnchoredSystemPath("one"),
+ Linkname: "two",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ {
+ Path: turbopath.AnchoredSystemPath("two"),
+ Linkname: "three",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ {
+ Path: turbopath.AnchoredSystemPath("three"),
+ Linkname: "real",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ {
+ Path: turbopath.AnchoredSystemPath("real"),
+ FileMode: 0 | 0644,
+ },
+ },
+ wantDarwin: "07278fdf37db4b212352367f391377bd6bac8f361dd834ae5522d809539bcf3b34d046873c1b45876d7372251446bb12c32f9fa9824914c4a1a01f6d7a206702",
+ wantUnix: "07278fdf37db4b212352367f391377bd6bac8f361dd834ae5522d809539bcf3b34d046873c1b45876d7372251446bb12c32f9fa9824914c4a1a01f6d7a206702",
+ wantWindows: "d4dac527e40860ee1ba3fdf2b9b12a1eba385050cf4f5877558dd531f0ecf2a06952fd5f88b852ad99e010943ed7b7f1437b727796369524e85f0c06f25d62c9",
+ },
+ {
+ name: "subdirectory",
+ files: []createFileDefinition{
+ {
+ Path: turbopath.AnchoredSystemPath("parent"),
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Path: turbopath.AnchoredSystemPath("parent/child"),
+ FileMode: 0 | 0644,
+ },
+ },
+ wantDarwin: "b513eea231daa84245d1d23d99fc398ccf17166ca49754ffbdcc1a3269cd75b7ad176a9c7095ff2481f71dca9fc350189747035f13d53b3a864e4fe35165233f",
+ wantUnix: "b513eea231daa84245d1d23d99fc398ccf17166ca49754ffbdcc1a3269cd75b7ad176a9c7095ff2481f71dca9fc350189747035f13d53b3a864e4fe35165233f",
+ wantWindows: "a8c3cba54e4dc214d3b21c3fa284d4032fe317d2f88943159efd5d16f3551ab53fae5c92ebf8acdd1bdb85d1238510b7938772cb11a0daa1b72b5e0f2700b5c7",
+ },
+ {
+ name: "symlink permissions",
+ files: []createFileDefinition{
+ {
+ Path: turbopath.AnchoredSystemPath("one"),
+ Linkname: "two",
+ FileMode: 0 | os.ModeSymlink | 0644,
+ },
+ },
+ wantDarwin: "3ea9d8a4581a0c2ba77557c72447b240c5ac622edcdac570a0bf597c276c2917b4ea73e6c373bbac593a480e396845651fa4b51e049531ff5d44c0adb807c2d9",
+ wantUnix: "99d953cbe1c0d8545e6f8382208fcefe14bcbefe39872f7b6310da14ac195b9a1b04b6d7b4b56f01a27216176193344a92488f99e124fcd68693f313f7137a1c",
+ wantWindows: "a4b1dc5c296f8ac4c9124727c1d84d70f72872c7bb4ced6d83ee312889e822baf1eaa72f88e624fb1aac4339d0a1f766ede77eabd2e4524eb26e89f883dc479d",
+ },
+ {
+ name: "unsupported types error",
+ files: []createFileDefinition{
+ {
+ Path: turbopath.AnchoredSystemPath("fifo"),
+ FileMode: 0 | os.ModeNamedPipe | 0644,
+ },
+ },
+ wantErr: errUnsupportedFileType,
+ },
+ }
+ for _, tt := range tests {
+ getTestFunc := func(compressed bool) func(t *testing.T) {
+ return func(t *testing.T) {
+ inputDir := turbopath.AbsoluteSystemPath(t.TempDir())
+ archiveDir := turbopath.AbsoluteSystemPath(t.TempDir())
+ var archivePath turbopath.AbsoluteSystemPath
+ if compressed {
+ archivePath = turbopath.AnchoredSystemPath("out.tar.zst").RestoreAnchor(archiveDir)
+ } else {
+ archivePath = turbopath.AnchoredSystemPath("out.tar").RestoreAnchor(archiveDir)
+ }
+
+ cacheItem, cacheCreateErr := Create(archivePath)
+ assert.NilError(t, cacheCreateErr, "Cache Create")
+
+ for _, file := range tt.files {
+ createErr := createEntry(t, inputDir, file)
+ if createErr != nil {
+ assert.ErrorIs(t, createErr, tt.wantErr)
+ assert.NilError(t, cacheItem.Close(), "Close")
+ return
+ }
+
+ addFileError := cacheItem.AddFile(inputDir, file.Path)
+ if addFileError != nil {
+ assert.ErrorIs(t, addFileError, tt.wantErr)
+ assert.NilError(t, cacheItem.Close(), "Close")
+ return
+ }
+ }
+
+ assert.NilError(t, cacheItem.Close(), "Cache Close")
+
+ // We only check for repeatability on compressed caches.
+ if compressed {
+ openedCacheItem, openedCacheItemErr := Open(archivePath)
+ assert.NilError(t, openedCacheItemErr, "Cache Open")
+
+ // We actually only need to compare the generated SHA.
+ // That ensures we got the same output. (Effectively snapshots.)
+ // This must be called after `Close` because both `tar` and `gzip` have footers.
+ shaOne, shaOneErr := openedCacheItem.GetSha()
+ assert.NilError(t, shaOneErr, "GetSha")
+ snapshot := hex.EncodeToString(shaOne)
+
+ switch runtime.GOOS {
+ case "darwin":
+ assert.Equal(t, snapshot, tt.wantDarwin, "Got expected hash.")
+ case "windows":
+ assert.Equal(t, snapshot, tt.wantWindows, "Got expected hash.")
+ default:
+ assert.Equal(t, snapshot, tt.wantUnix, "Got expected hash.")
+ }
+ assert.NilError(t, openedCacheItem.Close(), "Close")
+ }
+ }
+ }
+ t.Run(tt.name, getTestFunc(false))
+ t.Run(tt.name+"zst", getTestFunc(true))
+ }
+}
diff --git a/cli/internal/cacheitem/create_unix_test.go b/cli/internal/cacheitem/create_unix_test.go
new file mode 100644
index 0000000..812d1eb
--- /dev/null
+++ b/cli/internal/cacheitem/create_unix_test.go
@@ -0,0 +1,20 @@
+//go:build darwin || linux
+// +build darwin linux
+
+package cacheitem
+
+import (
+ "syscall"
+ "testing"
+
+ "github.com/vercel/turbo/cli/internal/turbopath"
+ "gotest.tools/v3/assert"
+)
+
+func createFifo(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error {
+ t.Helper()
+ path := fileDefinition.Path.RestoreAnchor(anchor)
+ fifoErr := syscall.Mknod(path.ToString(), syscall.S_IFIFO|0666, 0)
+ assert.NilError(t, fifoErr, "FIFO")
+ return fifoErr
+}
diff --git a/cli/internal/cacheitem/create_windows_test.go b/cli/internal/cacheitem/create_windows_test.go
new file mode 100644
index 0000000..2cbb8b9
--- /dev/null
+++ b/cli/internal/cacheitem/create_windows_test.go
@@ -0,0 +1,14 @@
+//go:build windows
+// +build windows
+
+package cacheitem
+
+import (
+ "testing"
+
+ "github.com/vercel/turbo/cli/internal/turbopath"
+)
+
+func createFifo(t *testing.T, anchor turbopath.AbsoluteSystemPath, fileDefinition createFileDefinition) error {
+ return errUnsupportedFileType
+}
diff --git a/cli/internal/cacheitem/filepath.go b/cli/internal/cacheitem/filepath.go
new file mode 100644
index 0000000..4fd1681
--- /dev/null
+++ b/cli/internal/cacheitem/filepath.go
@@ -0,0 +1,162 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cacheitem
+
+import "os"
+
+const _separator = os.PathSeparator
+
+// A lazybuf is a lazily constructed path buffer.
+// It supports append, reading previously appended bytes,
+// and retrieving the final string. It does not allocate a buffer
+// to hold the output until that output diverges from s.
+type lazybuf struct {
+ path string
+ buf []byte
+ w int
+ volAndPath string
+ volLen int
+}
+
+func (b *lazybuf) index(i int) byte {
+ if b.buf != nil {
+ return b.buf[i]
+ }
+ return b.path[i]
+}
+
+func (b *lazybuf) append(c byte) {
+ if b.buf == nil {
+ if b.w < len(b.path) && b.path[b.w] == c {
+ b.w++
+ return
+ }
+ b.buf = make([]byte, len(b.path))
+ copy(b.buf, b.path[:b.w])
+ }
+ b.buf[b.w] = c
+ b.w++
+}
+
+func (b *lazybuf) string() string {
+ if b.buf == nil {
+ return b.volAndPath[:b.volLen+b.w]
+ }
+ return b.volAndPath[:b.volLen] + string(b.buf[:b.w])
+}
+
+// Clean is extracted from stdlib and removes `FromSlash` processing
+// of the stdlib version.
+//
+// Clean returns the shortest path name equivalent to path
+// by purely lexical processing. It applies the following rules
+// iteratively until no further processing can be done:
+//
+// 1. Replace multiple Separator elements with a single one.
+// 2. Eliminate each . path name element (the current directory).
+// 3. Eliminate each inner .. path name element (the parent directory)
+// along with the non-.. element that precedes it.
+// 4. Eliminate .. elements that begin a rooted path:
+// that is, replace "/.." by "/" at the beginning of a path,
+// assuming Separator is '/'.
+//
+// The returned path ends in a slash only if it represents a root directory,
+// such as "/" on Unix or `C:\` on Windows.
+//
+// Finally, any occurrences of slash are replaced by Separator.
+//
+// If the result of this process is an empty string, Clean
+// returns the string ".".
+//
+// See also Rob Pike, “Lexical File Names in Plan 9 or
+// Getting Dot-Dot Right,”
+// https://9p.io/sys/doc/lexnames.html
+func Clean(path string) string {
+ originalPath := path
+ volLen := volumeNameLen(path)
+ path = path[volLen:]
+ if path == "" {
+ if volLen > 1 && originalPath[1] != ':' {
+ // should be UNC
+ // ORIGINAL: return FromSlash(originalPath)
+ return originalPath
+ }
+ return originalPath + "."
+ }
+ rooted := os.IsPathSeparator(path[0])
+
+ // Invariants:
+ // reading from path; r is index of next byte to process.
+ // writing to buf; w is index of next byte to write.
+ // dotdot is index in buf where .. must stop, either because
+ // it is the leading slash or it is a leading ../../.. prefix.
+ n := len(path)
+ out := lazybuf{path: path, volAndPath: originalPath, volLen: volLen}
+ r, dotdot := 0, 0
+ if rooted {
+ out.append(_separator)
+ r, dotdot = 1, 1
+ }
+
+ for r < n {
+ switch {
+ case os.IsPathSeparator(path[r]):
+ // empty path element
+ r++
+ case path[r] == '.' && r+1 == n:
+ // . element
+ r++
+ case path[r] == '.' && os.IsPathSeparator(path[r+1]):
+ // ./ element
+ r++
+
+ for r < len(path) && os.IsPathSeparator(path[r]) {
+ r++
+ }
+ if out.w == 0 && volumeNameLen(path[r:]) > 0 {
+ // When joining prefix "." and an absolute path on Windows,
+ // the prefix should not be removed.
+ out.append('.')
+ }
+ case path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])):
+ // .. element: remove to last separator
+ r += 2
+ switch {
+ case out.w > dotdot:
+ // can backtrack
+ out.w--
+ for out.w > dotdot && !os.IsPathSeparator(out.index(out.w)) {
+ out.w--
+ }
+ case !rooted:
+ // cannot backtrack, but not rooted, so append .. element.
+ if out.w > 0 {
+ out.append(_separator)
+ }
+ out.append('.')
+ out.append('.')
+ dotdot = out.w
+ }
+ default:
+ // real path element.
+ // add slash if needed
+ if rooted && out.w != 1 || !rooted && out.w != 0 {
+ out.append(_separator)
+ }
+ // copy element
+ for ; r < n && !os.IsPathSeparator(path[r]); r++ {
+ out.append(path[r])
+ }
+ }
+ }
+
+ // Turn empty string into "."
+ if out.w == 0 {
+ out.append('.')
+ }
+
+ // ORIGINAL: return FromSlash(out.string())
+ return out.string()
+}
diff --git a/cli/internal/cacheitem/filepath_unix.go b/cli/internal/cacheitem/filepath_unix.go
new file mode 100644
index 0000000..d0f6786
--- /dev/null
+++ b/cli/internal/cacheitem/filepath_unix.go
@@ -0,0 +1,14 @@
+//go:build !windows
+// +build !windows
+
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cacheitem
+
+// volumeNameLen returns length of the leading volume name on Windows.
+// It returns 0 elsewhere.
+func volumeNameLen(path string) int {
+ return 0
+}
diff --git a/cli/internal/cacheitem/filepath_windows.go b/cli/internal/cacheitem/filepath_windows.go
new file mode 100644
index 0000000..2c3b852
--- /dev/null
+++ b/cli/internal/cacheitem/filepath_windows.go
@@ -0,0 +1,50 @@
+//go:build windows
+// +build windows
+
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cacheitem
+
+func isSlash(c uint8) bool {
+ return c == '\\' || c == '/'
+}
+
+// volumeNameLen returns length of the leading volume name on Windows.
+// It returns 0 elsewhere.
+func volumeNameLen(path string) int {
+ if len(path) < 2 {
+ return 0
+ }
+ // with drive letter
+ c := path[0]
+ if path[1] == ':' && ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
+ return 2
+ }
+ // is it UNC? https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
+ if l := len(path); l >= 5 && isSlash(path[0]) && isSlash(path[1]) &&
+ !isSlash(path[2]) && path[2] != '.' {
+ // first, leading `\\` and next shouldn't be `\`. its server name.
+ for n := 3; n < l-1; n++ {
+ // second, next '\' shouldn't be repeated.
+ if isSlash(path[n]) {
+ n++
+ // third, following something characters. its share name.
+ if !isSlash(path[n]) {
+ if path[n] == '.' {
+ break
+ }
+ for ; n < l; n++ {
+ if isSlash(path[n]) {
+ break
+ }
+ }
+ return n
+ }
+ break
+ }
+ }
+ }
+ return 0
+}
diff --git a/cli/internal/cacheitem/restore.go b/cli/internal/cacheitem/restore.go
new file mode 100644
index 0000000..347b996
--- /dev/null
+++ b/cli/internal/cacheitem/restore.go
@@ -0,0 +1,200 @@
+package cacheitem
+
+import (
+ "archive/tar"
+ "errors"
+ "io"
+ "os"
+ "runtime"
+ "strings"
+
+ "github.com/DataDog/zstd"
+
+ "github.com/moby/sys/sequential"
+ "github.com/vercel/turbo/cli/internal/turbopath"
+)
+
+// Open returns an existing CacheItem at the specified path.
+func Open(path turbopath.AbsoluteSystemPath) (*CacheItem, error) {
+ handle, err := sequential.OpenFile(path.ToString(), os.O_RDONLY, 0777)
+ if err != nil {
+ return nil, err
+ }
+
+ return &CacheItem{
+ Path: path,
+ handle: handle,
+ compressed: strings.HasSuffix(path.ToString(), ".zst"),
+ }, nil
+}
+
+// Restore extracts a cache to a specified disk location.
+func (ci *CacheItem) Restore(anchor turbopath.AbsoluteSystemPath) ([]turbopath.AnchoredSystemPath, error) {
+ var tr *tar.Reader
+ var closeError error
+
+ // We're reading a tar, possibly wrapped in zstd.
+ if ci.compressed {
+ zr := zstd.NewReader(ci.handle)
+
+ // The `Close` function for compression effectively just returns the singular
+ // error field on the decompressor instance. This is extremely unlikely to be
+ // set without triggering one of the numerous other errors, but we should still
+ // handle that possible edge case.
+ defer func() { closeError = zr.Close() }()
+ tr = tar.NewReader(zr)
+ } else {
+ tr = tar.NewReader(ci.handle)
+ }
+
+ // On first attempt to restore it's possible that a link target doesn't exist.
+ // Save them and topsort them.
+ var symlinks []*tar.Header
+
+ restored := make([]turbopath.AnchoredSystemPath, 0)
+
+ restorePointErr := anchor.MkdirAll(0755)
+ if restorePointErr != nil {
+ return nil, restorePointErr
+ }
+
+ // We're going to make the following two assumptions here for "fast" path restoration:
+ // - All directories are enumerated in the `tar`.
+ // - The contents of the tar are enumerated depth-first.
+ //
+ // This allows us to avoid:
+ // - Attempts at recursive creation of directories.
+ // - Repetitive `lstat` on restore of a file.
+ //
+ // Violating these assumptions won't cause things to break but we're only going to maintain
+ // an `lstat` cache for the current tree. If you violate these assumptions and the current
+ // cache does not apply for your path, it will clobber and re-start from the common
+ // shared prefix.
+ dirCache := &cachedDirTree{
+ anchorAtDepth: []turbopath.AbsoluteSystemPath{anchor},
+ }
+
+ for {
+ header, trErr := tr.Next()
+ if trErr == io.EOF {
+ // The end, time to restore any missing links.
+ symlinksRestored, symlinksErr := topologicallyRestoreSymlinks(dirCache, anchor, symlinks, tr)
+ restored = append(restored, symlinksRestored...)
+ if symlinksErr != nil {
+ return restored, symlinksErr
+ }
+
+ break
+ }
+ if trErr != nil {
+ return restored, trErr
+ }
+
+ // The reader will not advance until tr.Next is called.
+ // We can treat this as file metadata + body reader.
+
+ // Attempt to place the file on disk.
+ file, restoreErr := restoreEntry(dirCache, anchor, header, tr)
+ if restoreErr != nil {
+ if errors.Is(restoreErr, errMissingSymlinkTarget) {
+ // Links get one shot to be valid, then they're accumulated, DAG'd, and restored on delay.
+ symlinks = append(symlinks, header)
+ continue
+ }
+ return restored, restoreErr
+ }
+ restored = append(restored, file)
+ }
+
+ return restored, closeError
+}
+
+// restoreRegular is the entry point for all things read from the tar.
+func restoreEntry(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header, reader *tar.Reader) (turbopath.AnchoredSystemPath, error) {
+ // We're permissive on creation, but restrictive on restoration.
+ // There is no need to prevent the cache creation in any case.
+ // And on restoration, if we fail, we simply run the task.
+ switch header.Typeflag {
+ case tar.TypeDir:
+ return restoreDirectory(dirCache, anchor, header)
+ case tar.TypeReg:
+ return restoreRegular(dirCache, anchor, header, reader)
+ case tar.TypeSymlink:
+ return restoreSymlink(dirCache, anchor, header)
+ default:
+ return "", errUnsupportedFileType
+ }
+}
+
+// canonicalizeName returns either an AnchoredSystemPath or an error.
+func canonicalizeName(name string) (turbopath.AnchoredSystemPath, error) {
+ // Assuming this was a `turbo`-created input, we currently have an AnchoredUnixPath.
+ // Assuming this is malicious input we don't really care if we do the wrong thing.
+ wellFormed, windowsSafe := checkName(name)
+
+ // Determine if the future filename is a well-formed AnchoredUnixPath
+ if !wellFormed {
+ return "", errNameMalformed
+ }
+
+ // Determine if the AnchoredUnixPath is safe to be used on Windows
+ if runtime.GOOS == "windows" && !windowsSafe {
+ return "", errNameWindowsUnsafe
+ }
+
+ // Directories will have a trailing slash. Remove it.
+ noTrailingSlash := strings.TrimSuffix(name, "/")
+
+ // Okay, we're all set here.
+ return turbopath.AnchoredUnixPathFromUpstream(noTrailingSlash).ToSystemPath(), nil
+}
+
+// checkName returns `wellFormed, windowsSafe` via inspection of separators and traversal
+func checkName(name string) (bool, bool) {
+ length := len(name)
+
+ // Name is of length 0.
+ if length == 0 {
+ return false, false
+ }
+
+ wellFormed := true
+ windowsSafe := true
+
+ // Name is:
+ // - "."
+ // - ".."
+ if wellFormed && (name == "." || name == "..") {
+ wellFormed = false
+ }
+
+ // Name starts with:
+ // - `/`
+ // - `./`
+ // - `../`
+ if wellFormed && (strings.HasPrefix(name, "/") || strings.HasPrefix(name, "./") || strings.HasPrefix(name, "../")) {
+ wellFormed = false
+ }
+
+ // Name ends in:
+ // - `/.`
+ // - `/..`
+ if wellFormed && (strings.HasSuffix(name, "/.") || strings.HasSuffix(name, "/..")) {
+ wellFormed = false
+ }
+
+ // Name contains:
+ // - `//`
+ // - `/./`
+ // - `/../`
+ if wellFormed && (strings.Contains(name, "//") || strings.Contains(name, "/./") || strings.Contains(name, "/../")) {
+ wellFormed = false
+ }
+
+ // Name contains: `\`
+ if strings.ContainsRune(name, '\\') {
+ windowsSafe = false
+ }
+
+ return wellFormed, windowsSafe
+}
diff --git a/cli/internal/cacheitem/restore_directory.go b/cli/internal/cacheitem/restore_directory.go
new file mode 100644
index 0000000..4704d66
--- /dev/null
+++ b/cli/internal/cacheitem/restore_directory.go
@@ -0,0 +1,144 @@
+package cacheitem
+
+import (
+ "archive/tar"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/vercel/turbo/cli/internal/turbopath"
+)
+
+// restoreDirectory restores a directory.
+func restoreDirectory(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header) (turbopath.AnchoredSystemPath, error) {
+ processedName, err := canonicalizeName(header.Name)
+ if err != nil {
+ return "", err
+ }
+
+ // We need to traverse `processedName` from base to root split at
+ // `os.Separator` to make sure we don't end up following a symlink
+ // outside of the restore path.
+
+ // Create the directory.
+ if err := safeMkdirAll(dirCache, anchor, processedName, header.Mode); err != nil {
+ return "", err
+ }
+
+ return processedName, nil
+}
+
+type cachedDirTree struct {
+ anchorAtDepth []turbopath.AbsoluteSystemPath
+ prefix []turbopath.RelativeSystemPath
+}
+
+func (cr *cachedDirTree) getStartingPoint(path turbopath.AnchoredSystemPath) (turbopath.AbsoluteSystemPath, []turbopath.RelativeSystemPath) {
+ pathSegmentStrings := strings.Split(path.ToString(), string(os.PathSeparator))
+ pathSegments := make([]turbopath.RelativeSystemPath, len(pathSegmentStrings))
+ for index, pathSegmentString := range pathSegmentStrings {
+ pathSegments[index] = turbopath.RelativeSystemPathFromUpstream(pathSegmentString)
+ }
+
+ i := 0
+ for i = 0; i < len(cr.prefix) && i < len(pathSegments); i++ {
+ if pathSegments[i] != cr.prefix[i] {
+ break
+ }
+ }
+
+ // 0: root anchor, can't remove it.
+ cr.anchorAtDepth = cr.anchorAtDepth[:i+1]
+
+ // 0: first prefix.
+ cr.prefix = cr.prefix[:i]
+
+ return cr.anchorAtDepth[i], pathSegments[i:]
+}
+
+func (cr *cachedDirTree) Update(anchor turbopath.AbsoluteSystemPath, newSegment turbopath.RelativeSystemPath) {
+ cr.anchorAtDepth = append(cr.anchorAtDepth, anchor)
+ cr.prefix = append(cr.prefix, newSegment)
+}
+
+// safeMkdirAll creates all directories, assuming that the leaf node is a directory.
+// FIXME: Recheck the symlink cache before creating a directory.
+func safeMkdirAll(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, processedName turbopath.AnchoredSystemPath, mode int64) error {
+ // Iterate through path segments by os.Separator, appending them onto the anchor.
+ // Check to see if that path segment is a symlink with a target outside of anchor.
+
+ // Pull the iteration starting point from thie directory cache.
+ calculatedAnchor, pathSegments := dirCache.getStartingPoint(processedName)
+ for _, segment := range pathSegments {
+ calculatedAnchor, checkPathErr := checkPath(anchor, calculatedAnchor, segment)
+ // We hit an existing directory or absolute path that was invalid.
+ if checkPathErr != nil {
+ return checkPathErr
+ }
+
+ // Otherwise we continue and check the next segment.
+ dirCache.Update(calculatedAnchor, segment)
+ }
+
+ // If we have made it here we know that it is safe to call os.MkdirAll
+ // on the Join of anchor and processedName.
+ //
+ // This could _still_ error, but we don't care.
+ return processedName.RestoreAnchor(anchor).MkdirAll(os.FileMode(mode))
+}
+
+// checkPath ensures that the resolved path (if restoring symlinks).
+// It makes sure to never traverse outside of the anchor.
+func checkPath(originalAnchor turbopath.AbsoluteSystemPath, accumulatedAnchor turbopath.AbsoluteSystemPath, segment turbopath.RelativeSystemPath) (turbopath.AbsoluteSystemPath, error) {
+ // Check if the segment itself is sneakily an absolute path...
+ // (looking at you, Windows. CON, AUX...)
+ if filepath.IsAbs(segment.ToString()) {
+ return "", errTraversal
+ }
+
+ // Find out if this portion of the path is a symlink.
+ combinedPath := accumulatedAnchor.Join(segment)
+ fileInfo, err := combinedPath.Lstat()
+
+ // Getting an error here means we failed to stat the path.
+ // Assume that means we're safe and continue.
+ if err != nil {
+ return combinedPath, nil
+ }
+
+ // Find out if we have a symlink.
+ isSymlink := fileInfo.Mode()&os.ModeSymlink != 0
+
+ // If we don't have a symlink it's safe.
+ if !isSymlink {
+ return combinedPath, nil
+ }
+
+ // Check to see if the symlink targets outside of the originalAnchor.
+ // We don't do eval symlinks because we could find ourself in a totally
+ // different place.
+
+ // 1. Get the target.
+ linkTarget, readLinkErr := combinedPath.Readlink()
+ if readLinkErr != nil {
+ return "", readLinkErr
+ }
+
+ // 2. See if the target is absolute.
+ if filepath.IsAbs(linkTarget) {
+ absoluteLinkTarget := turbopath.AbsoluteSystemPathFromUpstream(linkTarget)
+ if originalAnchor.HasPrefix(absoluteLinkTarget) {
+ return absoluteLinkTarget, nil
+ }
+ return "", errTraversal
+ }
+
+ // 3. Target is relative (or absolute Windows on a Unix device)
+ relativeLinkTarget := turbopath.RelativeSystemPathFromUpstream(linkTarget)
+ computedTarget := accumulatedAnchor.UntypedJoin(linkTarget)
+ if computedTarget.HasPrefix(originalAnchor) {
+ // Need to recurse and make sure the target doesn't link out.
+ return checkPath(originalAnchor, accumulatedAnchor, relativeLinkTarget)
+ }
+ return "", errTraversal
+}
diff --git a/cli/internal/cacheitem/restore_directory_test.go b/cli/internal/cacheitem/restore_directory_test.go
new file mode 100644
index 0000000..f75bd47
--- /dev/null
+++ b/cli/internal/cacheitem/restore_directory_test.go
@@ -0,0 +1,103 @@
+package cacheitem
+
+import (
+ "reflect"
+ "testing"
+
+ "github.com/vercel/turbo/cli/internal/turbopath"
+)
+
+func Test_cachedDirTree_getStartingPoint(t *testing.T) {
+ testDir := turbopath.AbsoluteSystemPath("")
+ tests := []struct {
+ name string
+
+ // STATE
+ cachedDirTree cachedDirTree
+
+ // INPUT
+ path turbopath.AnchoredSystemPath
+
+ // OUTPUT
+ calculatedAnchor turbopath.AbsoluteSystemPath
+ pathSegments []turbopath.RelativeSystemPath
+ }{
+ {
+ name: "hello world",
+ cachedDirTree: cachedDirTree{
+ anchorAtDepth: []turbopath.AbsoluteSystemPath{testDir},
+ prefix: []turbopath.RelativeSystemPath{},
+ },
+ path: turbopath.AnchoredUnixPath("hello/world").ToSystemPath(),
+ calculatedAnchor: testDir,
+ pathSegments: []turbopath.RelativeSystemPath{"hello", "world"},
+ },
+ {
+ name: "has a cache",
+ cachedDirTree: cachedDirTree{
+ anchorAtDepth: []turbopath.AbsoluteSystemPath{
+ testDir,
+ testDir.UntypedJoin("hello"),
+ },
+ prefix: []turbopath.RelativeSystemPath{"hello"},
+ },
+ path: turbopath.AnchoredUnixPath("hello/world").ToSystemPath(),
+ calculatedAnchor: testDir.UntypedJoin("hello"),
+ pathSegments: []turbopath.RelativeSystemPath{"world"},
+ },
+ {
+ name: "ask for yourself",
+ cachedDirTree: cachedDirTree{
+ anchorAtDepth: []turbopath.AbsoluteSystemPath{
+ testDir,
+ testDir.UntypedJoin("hello"),
+ testDir.UntypedJoin("hello", "world"),
+ },
+ prefix: []turbopath.RelativeSystemPath{"hello", "world"},
+ },
+ path: turbopath.AnchoredUnixPath("hello/world").ToSystemPath(),
+ calculatedAnchor: testDir.UntypedJoin("hello", "world"),
+ pathSegments: []turbopath.RelativeSystemPath{},
+ },
+ {
+ name: "three layer cake",
+ cachedDirTree: cachedDirTree{
+ anchorAtDepth: []turbopath.AbsoluteSystemPath{
+ testDir,
+ testDir.UntypedJoin("hello"),
+ testDir.UntypedJoin("hello", "world"),
+ },
+ prefix: []turbopath.RelativeSystemPath{"hello", "world"},
+ },
+ path: turbopath.AnchoredUnixPath("hello/world/again").ToSystemPath(),
+ calculatedAnchor: testDir.UntypedJoin("hello", "world"),
+ pathSegments: []turbopath.RelativeSystemPath{"again"},
+ },
+ {
+ name: "outside of cache hierarchy",
+ cachedDirTree: cachedDirTree{
+ anchorAtDepth: []turbopath.AbsoluteSystemPath{
+ testDir,
+ testDir.UntypedJoin("hello"),
+ testDir.UntypedJoin("hello", "world"),
+ },
+ prefix: []turbopath.RelativeSystemPath{"hello", "world"},
+ },
+ path: turbopath.AnchoredUnixPath("somewhere/else").ToSystemPath(),
+ calculatedAnchor: testDir,
+ pathSegments: []turbopath.RelativeSystemPath{"somewhere", "else"},
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ cr := tt.cachedDirTree
+ calculatedAnchor, pathSegments := cr.getStartingPoint(tt.path)
+ if !reflect.DeepEqual(calculatedAnchor, tt.calculatedAnchor) {
+ t.Errorf("cachedDirTree.getStartingPoint() calculatedAnchor = %v, want %v", calculatedAnchor, tt.calculatedAnchor)
+ }
+ if !reflect.DeepEqual(pathSegments, tt.pathSegments) {
+ t.Errorf("cachedDirTree.getStartingPoint() pathSegments = %v, want %v", pathSegments, tt.pathSegments)
+ }
+ })
+ }
+}
diff --git a/cli/internal/cacheitem/restore_regular.go b/cli/internal/cacheitem/restore_regular.go
new file mode 100644
index 0000000..ed8946e
--- /dev/null
+++ b/cli/internal/cacheitem/restore_regular.go
@@ -0,0 +1,46 @@
+package cacheitem
+
+import (
+ "archive/tar"
+ "io"
+ "os"
+
+ "github.com/vercel/turbo/cli/internal/turbopath"
+)
+
+// restoreRegular restores a file.
+func restoreRegular(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header, reader *tar.Reader) (turbopath.AnchoredSystemPath, error) {
+ // Assuming this was a `turbo`-created input, we currently have an AnchoredUnixPath.
+ // Assuming this is malicious input we don't really care if we do the wrong thing.
+ processedName, err := canonicalizeName(header.Name)
+ if err != nil {
+ return "", err
+ }
+
+ // We need to traverse `processedName` from base to root split at
+ // `os.Separator` to make sure we don't end up following a symlink
+ // outside of the restore path.
+ if err := safeMkdirFile(dirCache, anchor, processedName, header.Mode); err != nil {
+ return "", err
+ }
+
+ // Create the file.
+ if f, err := processedName.RestoreAnchor(anchor).OpenFile(os.O_WRONLY|os.O_TRUNC|os.O_CREATE, os.FileMode(header.Mode)); err != nil {
+ return "", err
+ } else if _, err := io.Copy(f, reader); err != nil {
+ return "", err
+ } else if err := f.Close(); err != nil {
+ return "", err
+ }
+ return processedName, nil
+}
+
+// safeMkdirAll creates all directories, assuming that the leaf node is a file.
+func safeMkdirFile(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, processedName turbopath.AnchoredSystemPath, mode int64) error {
+ isRootFile := processedName.Dir() == "."
+ if !isRootFile {
+ return safeMkdirAll(dirCache, anchor, processedName.Dir(), 0755)
+ }
+
+ return nil
+}
diff --git a/cli/internal/cacheitem/restore_symlink.go b/cli/internal/cacheitem/restore_symlink.go
new file mode 100644
index 0000000..4cb29f5
--- /dev/null
+++ b/cli/internal/cacheitem/restore_symlink.go
@@ -0,0 +1,180 @@
+package cacheitem
+
+import (
+ "archive/tar"
+ "io/fs"
+ "os"
+ "path/filepath"
+
+ "github.com/pyr-sh/dag"
+ "github.com/vercel/turbo/cli/internal/turbopath"
+)
+
+// restoreSymlink restores a symlink and errors if the target is missing.
+func restoreSymlink(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header) (turbopath.AnchoredSystemPath, error) {
+ processedName, canonicalizeNameErr := canonicalizeName(header.Name)
+ if canonicalizeNameErr != nil {
+ return "", canonicalizeNameErr
+ }
+
+ // Check to see if the target exists.
+ processedLinkname := canonicalizeLinkname(anchor, processedName, header.Linkname)
+ if _, err := os.Lstat(processedLinkname); err != nil {
+ return "", errMissingSymlinkTarget
+ }
+
+ return actuallyRestoreSymlink(dirCache, anchor, processedName, header)
+}
+
+// restoreSymlinkMissingTarget restores a symlink and does not error if the target is missing.
+func restoreSymlinkMissingTarget(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, header *tar.Header) (turbopath.AnchoredSystemPath, error) {
+ processedName, canonicalizeNameErr := canonicalizeName(header.Name)
+ if canonicalizeNameErr != nil {
+ return "", canonicalizeNameErr
+ }
+
+ return actuallyRestoreSymlink(dirCache, anchor, processedName, header)
+}
+
+func actuallyRestoreSymlink(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, processedName turbopath.AnchoredSystemPath, header *tar.Header) (turbopath.AnchoredSystemPath, error) {
+ // We need to traverse `processedName` from base to root split at
+ // `os.Separator` to make sure we don't end up following a symlink
+ // outside of the restore path.
+ if err := safeMkdirFile(dirCache, anchor, processedName, header.Mode); err != nil {
+ return "", err
+ }
+
+ // Specify where we restoring this symlink.
+ symlinkFrom := processedName.RestoreAnchor(anchor)
+
+ // Remove any existing object at that location.
+ // If it errors we'll catch it on creation.
+ _ = symlinkFrom.Remove()
+
+ // Create the symlink.
+ // Explicitly uses the _original_ header.Linkname as the target.
+ // This does not support file names with `\` in them in a cross-platform manner.
+ symlinkErr := symlinkFrom.Symlink(header.Linkname)
+ if symlinkErr != nil {
+ return "", symlinkErr
+ }
+
+ // Darwin allows you to change the permissions of a symlink.
+ lchmodErr := symlinkFrom.Lchmod(fs.FileMode(header.Mode))
+ if lchmodErr != nil {
+ return "", lchmodErr
+ }
+
+ return processedName, nil
+}
+
+// topologicallyRestoreSymlinks ensures that targets of symlinks are created in advance
+// of the things that link to them. It does this by topologically sorting all
+// of the symlinks. This also enables us to ensure we do not create cycles.
+func topologicallyRestoreSymlinks(dirCache *cachedDirTree, anchor turbopath.AbsoluteSystemPath, symlinks []*tar.Header, tr *tar.Reader) ([]turbopath.AnchoredSystemPath, error) {
+ restored := make([]turbopath.AnchoredSystemPath, 0)
+ lookup := make(map[string]*tar.Header)
+
+ var g dag.AcyclicGraph
+ for _, header := range symlinks {
+ processedName, err := canonicalizeName(header.Name)
+ processedSourcename := canonicalizeLinkname(anchor, processedName, processedName.ToString())
+ processedLinkname := canonicalizeLinkname(anchor, processedName, header.Linkname)
+ if err != nil {
+ return nil, err
+ }
+ g.Add(processedSourcename)
+ g.Add(processedLinkname)
+ g.Connect(dag.BasicEdge(processedLinkname, processedSourcename))
+ lookup[processedSourcename] = header
+ }
+
+ cycles := g.Cycles()
+ if cycles != nil {
+ return restored, errCycleDetected
+ }
+
+ roots := make(dag.Set)
+ for _, v := range g.Vertices() {
+ if g.UpEdges(v).Len() == 0 {
+ roots.Add(v)
+ }
+ }
+
+ walkFunc := func(vertex dag.Vertex, depth int) error {
+ key, ok := vertex.(string)
+ if !ok {
+ return nil
+ }
+ header, exists := lookup[key]
+ if !exists {
+ return nil
+ }
+
+ file, restoreErr := restoreSymlinkMissingTarget(dirCache, anchor, header)
+ if restoreErr != nil {
+ return restoreErr
+ }
+
+ restored = append(restored, file)
+ return nil
+ }
+
+ walkError := g.DepthFirstWalk(roots, walkFunc)
+ if walkError != nil {
+ return restored, walkError
+ }
+
+ return restored, nil
+}
+
+// canonicalizeLinkname determines (lexically) what the resolved path on the
+// system will be when linkname is restored verbatim.
+func canonicalizeLinkname(anchor turbopath.AbsoluteSystemPath, processedName turbopath.AnchoredSystemPath, linkname string) string {
+ // We don't know _anything_ about linkname. It could be any of:
+ //
+ // - Absolute Unix Path
+ // - Absolute Windows Path
+ // - Relative Unix Path
+ // - Relative Windows Path
+ //
+ // We also can't _truly_ distinguish if the path is Unix or Windows.
+ // Take for example: `/Users/turbobot/weird-filenames/\foo\/lol`
+ // It is a valid file on Unix, but if we do slash conversion it breaks.
+ // Or `i\am\a\normal\unix\file\but\super\nested\on\windows`.
+ //
+ // We also can't safely assume that paths in link targets on one platform
+ // should be treated as targets for that platform. The author may be
+ // generating an artifact that should work on Windows on a Unix device.
+ //
+ // Given all of that, our best option is to restore link targets _verbatim_.
+ // No modification, no slash conversion.
+ //
+ // In order to DAG sort them, however, we do need to canonicalize them.
+ // We canonicalize them as if we're restoring them verbatim.
+ //
+ // 0. We've extracted a version of `Clean` from stdlib which does nothing but
+ // separator and traversal collapsing.
+ cleanedLinkname := Clean(linkname)
+
+ // 1. Check to see if the link target is absolute _on the current platform_.
+ // If it is an absolute path it's canonical by rule.
+ if filepath.IsAbs(cleanedLinkname) {
+ return cleanedLinkname
+ }
+
+ // Remaining options:
+ // - Absolute (other platform) Path
+ // - Relative Unix Path
+ // - Relative Windows Path
+ //
+ // At this point we simply assume that it's a relative path—no matter
+ // which separators appear in it and where they appear, We can't do
+ // anything else because the OS will also treat it like that when it is
+ // a link target.
+ //
+ // We manually join these to avoid calls to stdlib's `Clean`.
+ source := processedName.RestoreAnchor(anchor)
+ canonicalized := source.Dir().ToString() + string(os.PathSeparator) + cleanedLinkname
+ return Clean(canonicalized)
+}
diff --git a/cli/internal/cacheitem/restore_test.go b/cli/internal/cacheitem/restore_test.go
new file mode 100644
index 0000000..a0a33d6
--- /dev/null
+++ b/cli/internal/cacheitem/restore_test.go
@@ -0,0 +1,1493 @@
+package cacheitem
+
+import (
+ "archive/tar"
+ "errors"
+ "fmt"
+ "io"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "reflect"
+ "runtime"
+ "syscall"
+ "testing"
+
+ "github.com/DataDog/zstd"
+ "github.com/vercel/turbo/cli/internal/turbopath"
+ "gotest.tools/v3/assert"
+)
+
+type tarFile struct {
+ Body string
+ *tar.Header
+}
+
+type restoreFile struct {
+ Name turbopath.AnchoredUnixPath
+ Linkname string
+ fs.FileMode
+}
+
+// generateTar is used specifically to generate tar files that Turborepo would
+// rarely or never encounter without malicious or pathological inputs. We use it
+// to make sure that we respond well in these scenarios during restore attempts.
+func generateTar(t *testing.T, files []tarFile) turbopath.AbsoluteSystemPath {
+ t.Helper()
+ testDir := turbopath.AbsoluteSystemPath(t.TempDir())
+ testArchivePath := testDir.UntypedJoin("out.tar")
+
+ handle, handleCreateErr := testArchivePath.Create()
+ assert.NilError(t, handleCreateErr, "os.Create")
+
+ tw := tar.NewWriter(handle)
+
+ for _, file := range files {
+ if file.Header.Typeflag == tar.TypeReg {
+ file.Header.Size = int64(len(file.Body))
+ }
+
+ writeHeaderErr := tw.WriteHeader(file.Header)
+ assert.NilError(t, writeHeaderErr, "tw.WriteHeader")
+
+ _, writeErr := tw.Write([]byte(file.Body))
+ assert.NilError(t, writeErr, "tw.Write")
+ }
+
+ twCloseErr := tw.Close()
+ assert.NilError(t, twCloseErr, "tw.Close")
+
+ handleCloseErr := handle.Close()
+ assert.NilError(t, handleCloseErr, "handle.Close")
+
+ return testArchivePath
+}
+
+// compressTar splits the compression of a tar file so that we don't
+// accidentally diverge in tar creation while still being able to test
+// restoration from tar and from .tar.zst.
+func compressTar(t *testing.T, archivePath turbopath.AbsoluteSystemPath) turbopath.AbsoluteSystemPath {
+ t.Helper()
+
+ inputHandle, inputHandleOpenErr := archivePath.Open()
+ assert.NilError(t, inputHandleOpenErr, "os.Open")
+
+ outputPath := archivePath + ".zst"
+ outputHandle, outputHandleCreateErr := outputPath.Create()
+ assert.NilError(t, outputHandleCreateErr, "os.Create")
+
+ zw := zstd.NewWriter(outputHandle)
+ _, copyError := io.Copy(zw, inputHandle)
+ assert.NilError(t, copyError, "io.Copy")
+
+ zwCloseErr := zw.Close()
+ assert.NilError(t, zwCloseErr, "zw.Close")
+
+ inputHandleCloseErr := inputHandle.Close()
+ assert.NilError(t, inputHandleCloseErr, "inputHandle.Close")
+
+ outputHandleCloseErr := outputHandle.Close()
+ assert.NilError(t, outputHandleCloseErr, "outputHandle.Close")
+
+ return outputPath
+}
+
+func generateAnchor(t *testing.T) turbopath.AbsoluteSystemPath {
+ t.Helper()
+ testDir := turbopath.AbsoluteSystemPath(t.TempDir())
+ anchorPoint := testDir.UntypedJoin("anchor")
+
+ mkdirErr := anchorPoint.Mkdir(0777)
+ assert.NilError(t, mkdirErr, "Mkdir")
+
+ return anchorPoint
+}
+
+func assertFileExists(t *testing.T, anchor turbopath.AbsoluteSystemPath, diskFile restoreFile) {
+ t.Helper()
+ // If we have gotten here we can assume this to be true.
+ processedName := diskFile.Name.ToSystemPath()
+ fullName := processedName.RestoreAnchor(anchor)
+ fileInfo, err := fullName.Lstat()
+ assert.NilError(t, err, "Lstat")
+
+ assert.Equal(t, fileInfo.Mode()&fs.ModePerm, diskFile.FileMode&fs.ModePerm, "File has the expected permissions: "+processedName)
+ assert.Equal(t, fileInfo.Mode()|fs.ModePerm, diskFile.FileMode|fs.ModePerm, "File has the expected mode.")
+
+ if diskFile.FileMode&os.ModeSymlink != 0 {
+ linkname, err := fullName.Readlink()
+ assert.NilError(t, err, "Readlink")
+
+ // We restore Linkname verbatim.
+ assert.Equal(t, linkname, diskFile.Linkname, "Link target matches.")
+ }
+}
+
+func TestOpen(t *testing.T) {
+ type wantErr struct {
+ unix error
+ windows error
+ }
+ type wantOutput struct {
+ unix []turbopath.AnchoredSystemPath
+ windows []turbopath.AnchoredSystemPath
+ }
+ type wantFiles struct {
+ unix []restoreFile
+ windows []restoreFile
+ }
+ tests := []struct {
+ name string
+ tarFiles []tarFile
+ wantOutput wantOutput
+ wantFiles wantFiles
+ wantErr wantErr
+ }{
+ {
+ name: "cache optimized",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "one/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/three/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/three/file-one",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/three/file-two",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/a/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/a/file",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/b/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/b/file",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "one",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two/three",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two/three/file-one",
+ FileMode: 0644,
+ },
+ {
+ Name: "one/two/three/file-two",
+ FileMode: 0644,
+ },
+ {
+ Name: "one/two/a",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two/a/file",
+ FileMode: 0644,
+ },
+ {
+ Name: "one/two/b",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two/b/file",
+ FileMode: 0644,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "one",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two/three",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two/three/file-one",
+ FileMode: 0666,
+ },
+ {
+ Name: "one/two/three/file-two",
+ FileMode: 0666,
+ },
+ {
+ Name: "one/two/a",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two/a/file",
+ FileMode: 0666,
+ },
+ {
+ Name: "one/two/b",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two/b/file",
+ FileMode: 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{
+ "one",
+ "one/two",
+ "one/two/three",
+ "one/two/three/file-one",
+ "one/two/three/file-two",
+ "one/two/a",
+ "one/two/a/file",
+ "one/two/b",
+ "one/two/b/file",
+ }.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "pathological cache works",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "one/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/a/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/b/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/three/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/a/file",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/b/file",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/three/file-one",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/three/file-two",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "one",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two/three",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two/three/file-one",
+ FileMode: 0644,
+ },
+ {
+ Name: "one/two/three/file-two",
+ FileMode: 0644,
+ },
+ {
+ Name: "one/two/a",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two/a/file",
+ FileMode: 0644,
+ },
+ {
+ Name: "one/two/b",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "one/two/b/file",
+ FileMode: 0644,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "one",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two/three",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two/three/file-one",
+ FileMode: 0666,
+ },
+ {
+ Name: "one/two/three/file-two",
+ FileMode: 0666,
+ },
+ {
+ Name: "one/two/a",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two/a/file",
+ FileMode: 0666,
+ },
+ {
+ Name: "one/two/b",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "one/two/b/file",
+ FileMode: 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{
+ "one",
+ "one/two",
+ "one/two/a",
+ "one/two/b",
+ "one/two/three",
+ "one/two/a/file",
+ "one/two/b/file",
+ "one/two/three/file-one",
+ "one/two/three/file-two",
+ }.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "hello world",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "target",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ Body: "target",
+ },
+ {
+ Header: &tar.Header{
+ Name: "source",
+ Linkname: "target",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "source",
+ Linkname: "target",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ {
+ Name: "target",
+ FileMode: 0644,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "source",
+ Linkname: "target",
+ FileMode: 0 | os.ModeSymlink | 0666,
+ },
+ {
+ Name: "target",
+ FileMode: 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{"target", "source"}.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "nested file",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "folder/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "folder/file",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ Body: "file",
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "folder",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "folder/file",
+ FileMode: 0644,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "folder",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "folder/file",
+ FileMode: 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{"folder", "folder/file"}.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "nested symlink",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "folder/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "folder/symlink",
+ Linkname: "../",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "folder/symlink/folder-sibling",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ Body: "folder-sibling",
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "folder",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "folder/symlink",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ Linkname: "../",
+ },
+ {
+ Name: "folder/symlink/folder-sibling",
+ FileMode: 0644,
+ },
+ {
+ Name: "folder-sibling",
+ FileMode: 0644,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "folder",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "folder/symlink",
+ FileMode: 0 | os.ModeSymlink | 0666,
+ Linkname: "..\\",
+ },
+ {
+ Name: "folder/symlink/folder-sibling",
+ FileMode: 0666,
+ },
+ {
+ Name: "folder-sibling",
+ FileMode: 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{"folder", "folder/symlink", "folder/symlink/folder-sibling"}.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "pathological symlinks",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "one",
+ Linkname: "two",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "two",
+ Linkname: "three",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "three",
+ Linkname: "real",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "real",
+ Typeflag: tar.TypeReg,
+ Mode: 0755,
+ },
+ Body: "real",
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "one",
+ Linkname: "two",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ {
+ Name: "two",
+ Linkname: "three",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ {
+ Name: "three",
+ Linkname: "real",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ {
+ Name: "real",
+ FileMode: 0 | 0755,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "one",
+ Linkname: "two",
+ FileMode: 0 | os.ModeSymlink | 0666,
+ },
+ {
+ Name: "two",
+ Linkname: "three",
+ FileMode: 0 | os.ModeSymlink | 0666,
+ },
+ {
+ Name: "three",
+ Linkname: "real",
+ FileMode: 0 | os.ModeSymlink | 0666,
+ },
+ {
+ Name: "real",
+ FileMode: 0 | 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{"real", "three", "two", "one"}.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "place file at dir location",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "folder-not-file/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "folder-not-file/subfile",
+ Typeflag: tar.TypeReg,
+ Mode: 0755,
+ },
+ Body: "subfile",
+ },
+ {
+ Header: &tar.Header{
+ Name: "folder-not-file",
+ Typeflag: tar.TypeReg,
+ Mode: 0755,
+ },
+ Body: "this shouldn't work",
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "folder-not-file",
+ FileMode: 0 | os.ModeDir | 0755,
+ },
+ {
+ Name: "folder-not-file/subfile",
+ FileMode: 0755,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "folder-not-file",
+ FileMode: 0 | os.ModeDir | 0777,
+ },
+ {
+ Name: "folder-not-file/subfile",
+ FileMode: 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{"folder-not-file", "folder-not-file/subfile"}.ToSystemPathArray(),
+ },
+ wantErr: wantErr{
+ unix: syscall.EISDIR,
+ windows: syscall.EISDIR,
+ },
+ },
+ // {
+ // name: "missing symlink with file at subdir",
+ // tarFiles: []tarFile{
+ // {
+ // Header: &tar.Header{
+ // Name: "one",
+ // Linkname: "two",
+ // Typeflag: tar.TypeSymlink,
+ // Mode: 0777,
+ // },
+ // },
+ // {
+ // Header: &tar.Header{
+ // Name: "one/file",
+ // Typeflag: tar.TypeReg,
+ // Mode: 0755,
+ // },
+ // Body: "file",
+ // },
+ // },
+ // wantFiles: wantFiles{
+ // unix: []restoreFile{
+ // {
+ // Name: "one",
+ // Linkname: "two",
+ // FileMode: 0 | os.ModeSymlink | 0777,
+ // },
+ // },
+ // },
+ // wantOutput: wantOutput{
+ // unix: turbopath.AnchoredUnixPathArray{"one"}.ToSystemPathArray(),
+ // windows: nil,
+ // },
+ // wantErr: wantErr{
+ // unix: os.ErrExist,
+ // windows: os.ErrExist,
+ // },
+ // },
+ {
+ name: "symlink cycle",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "one",
+ Linkname: "two",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "two",
+ Linkname: "three",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "three",
+ Linkname: "one",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{},
+ },
+ wantOutput: wantOutput{
+ unix: []turbopath.AnchoredSystemPath{},
+ },
+ wantErr: wantErr{
+ unix: errCycleDetected,
+ windows: errCycleDetected,
+ },
+ },
+ {
+ name: "symlink clobber",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "one",
+ Linkname: "two",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one",
+ Linkname: "three",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one",
+ Linkname: "real",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "real",
+ Typeflag: tar.TypeReg,
+ Mode: 0755,
+ },
+ Body: "real",
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "one",
+ Linkname: "real",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ {
+ Name: "real",
+ FileMode: 0755,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "one",
+ Linkname: "real",
+ FileMode: 0 | os.ModeSymlink | 0666,
+ },
+ {
+ Name: "real",
+ FileMode: 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{"real", "one"}.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "symlink traversal",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "escape",
+ Linkname: "../",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "escape/file",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ Body: "file",
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "escape",
+ Linkname: "../",
+ FileMode: 0 | os.ModeSymlink | 0777,
+ },
+ },
+ windows: []restoreFile{
+ {
+ Name: "escape",
+ Linkname: "..\\",
+ FileMode: 0 | os.ModeSymlink | 0666,
+ },
+ },
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{"escape"}.ToSystemPathArray(),
+ },
+ wantErr: wantErr{
+ unix: errTraversal,
+ windows: errTraversal,
+ },
+ },
+ {
+ name: "Double indirection: file",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "up",
+ Linkname: "../",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "link",
+ Linkname: "up",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "link/outside-file",
+ Typeflag: tar.TypeReg,
+ Mode: 0755,
+ },
+ },
+ },
+ wantErr: wantErr{unix: errTraversal, windows: errTraversal},
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{
+ "up",
+ "link",
+ }.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "Double indirection: folder",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "up",
+ Linkname: "../",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "link",
+ Linkname: "up",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "link/level-one/level-two/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ },
+ wantErr: wantErr{unix: errTraversal, windows: errTraversal},
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{
+ "up",
+ "link",
+ }.ToSystemPathArray(),
+ },
+ },
+ {
+ name: "name traversal",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "../escape",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ Body: "file",
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{},
+ },
+ wantOutput: wantOutput{
+ unix: []turbopath.AnchoredSystemPath{},
+ },
+ wantErr: wantErr{
+ unix: errNameMalformed,
+ windows: errNameMalformed,
+ },
+ },
+ {
+ name: "windows unsafe",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "back\\slash\\file",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ Body: "file",
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{
+ {
+ Name: "back\\slash\\file",
+ FileMode: 0644,
+ },
+ },
+ windows: []restoreFile{},
+ },
+ wantOutput: wantOutput{
+ unix: turbopath.AnchoredUnixPathArray{"back\\slash\\file"}.ToSystemPathArray(),
+ windows: turbopath.AnchoredUnixPathArray{}.ToSystemPathArray(),
+ },
+ wantErr: wantErr{
+ unix: nil,
+ windows: errNameWindowsUnsafe,
+ },
+ },
+ {
+ name: "fifo (and others) unsupported",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "fifo",
+ Typeflag: tar.TypeFifo,
+ },
+ },
+ },
+ wantFiles: wantFiles{
+ unix: []restoreFile{},
+ },
+ wantOutput: wantOutput{
+ unix: []turbopath.AnchoredSystemPath{},
+ },
+ wantErr: wantErr{
+ unix: errUnsupportedFileType,
+ windows: errUnsupportedFileType,
+ },
+ },
+ }
+ for _, tt := range tests {
+ getTestFunc := func(compressed bool) func(t *testing.T) {
+ return func(t *testing.T) {
+ var archivePath turbopath.AbsoluteSystemPath
+ if compressed {
+ archivePath = compressTar(t, generateTar(t, tt.tarFiles))
+ } else {
+ archivePath = generateTar(t, tt.tarFiles)
+ }
+ anchor := generateAnchor(t)
+
+ cacheItem, err := Open(archivePath)
+ assert.NilError(t, err, "Open")
+
+ restoreOutput, restoreErr := cacheItem.Restore(anchor)
+ var desiredErr error
+ if runtime.GOOS == "windows" {
+ desiredErr = tt.wantErr.windows
+ } else {
+ desiredErr = tt.wantErr.unix
+ }
+ if desiredErr != nil {
+ if !errors.Is(restoreErr, desiredErr) {
+ t.Errorf("wanted err: %v, got err: %v", tt.wantErr, restoreErr)
+ }
+ } else {
+ assert.NilError(t, restoreErr, "Restore")
+ }
+
+ outputComparison := tt.wantOutput.unix
+ if runtime.GOOS == "windows" && tt.wantOutput.windows != nil {
+ outputComparison = tt.wantOutput.windows
+ }
+
+ if !reflect.DeepEqual(restoreOutput, outputComparison) {
+ t.Errorf("Restore() = %v, want %v", restoreOutput, outputComparison)
+ }
+
+ // Check files on disk.
+ filesComparison := tt.wantFiles.unix
+ if runtime.GOOS == "windows" && tt.wantFiles.windows != nil {
+ filesComparison = tt.wantFiles.windows
+ }
+ for _, diskFile := range filesComparison {
+ assertFileExists(t, anchor, diskFile)
+ }
+
+ assert.NilError(t, cacheItem.Close(), "Close")
+ }
+ }
+ t.Run(tt.name+"zst", getTestFunc(true))
+ t.Run(tt.name, getTestFunc(false))
+ }
+}
+
+func Test_checkName(t *testing.T) {
+ tests := []struct {
+ path string
+ wellFormed bool
+ windowsSafe bool
+ }{
+ // Empty
+ {
+ path: "",
+ wellFormed: false,
+ windowsSafe: false,
+ },
+ // Bad prefix
+ {
+ path: ".",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "..",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "/",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "./",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "../",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ // Bad prefix, suffixed
+ {
+ path: "/a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "./a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "../a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ // Bad Suffix
+ {
+ path: "/.",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "/..",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ // Bad Suffix, with prefix
+ {
+ path: "a/.",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "a/..",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ // Bad middle
+ {
+ path: "//",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "/./",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "/../",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ // Bad middle, prefixed
+ {
+ path: "a//",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "a/./",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "a/../",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ // Bad middle, suffixed
+ {
+ path: "//a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "/./a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "/../a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ // Bad middle, wrapped
+ {
+ path: "a//a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "a/./a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ {
+ path: "a/../a",
+ wellFormed: false,
+ windowsSafe: true,
+ },
+ // False positive tests
+ {
+ path: "...",
+ wellFormed: true,
+ windowsSafe: true,
+ },
+ {
+ path: ".../a",
+ wellFormed: true,
+ windowsSafe: true,
+ },
+ {
+ path: "a/...",
+ wellFormed: true,
+ windowsSafe: true,
+ },
+ {
+ path: "a/.../a",
+ wellFormed: true,
+ windowsSafe: true,
+ },
+ {
+ path: ".../...",
+ wellFormed: true,
+ windowsSafe: true,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(fmt.Sprintf("Path: \"%v\"", tt.path), func(t *testing.T) {
+ wellFormed, windowsSafe := checkName(tt.path)
+ if wellFormed != tt.wellFormed || windowsSafe != tt.windowsSafe {
+ t.Errorf("\nwantOutput: checkName(\"%v\") wellFormed = %v, windowsSafe %v\ngot: checkName(\"%v\") wellFormed = %v, windowsSafe %v", tt.path, tt.wellFormed, tt.windowsSafe, tt.path, wellFormed, windowsSafe)
+ }
+ })
+ }
+}
+
+func Test_canonicalizeLinkname(t *testing.T) {
+ // We're lying that this thing is absolute, but that's not relevant for tests.
+ anchor := turbopath.AbsoluteSystemPath(filepath.Join("path", "to", "anchor"))
+
+ tests := []struct {
+ name string
+ processedName turbopath.AnchoredSystemPath
+ linkname string
+ canonicalUnix string
+ canonicalWindows string
+ }{
+ {
+ name: "hello world",
+ processedName: turbopath.AnchoredSystemPath("source"),
+ linkname: "target",
+ canonicalUnix: "path/to/anchor/target",
+ canonicalWindows: "path\\to\\anchor\\target",
+ },
+ {
+ name: "Unix path subdirectory traversal",
+ processedName: turbopath.AnchoredUnixPath("child/source").ToSystemPath(),
+ linkname: "../sibling/target",
+ canonicalUnix: "path/to/anchor/sibling/target",
+ canonicalWindows: "path\\to\\anchor\\sibling\\target",
+ },
+ {
+ name: "Windows path subdirectory traversal",
+ processedName: turbopath.AnchoredUnixPath("child/source").ToSystemPath(),
+ linkname: "..\\sibling\\target",
+ canonicalUnix: "path/to/anchor/child/..\\sibling\\target",
+ canonicalWindows: "path\\to\\anchor\\sibling\\target",
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ canonical := tt.canonicalUnix
+ if runtime.GOOS == "windows" {
+ canonical = tt.canonicalWindows
+ }
+ if got := canonicalizeLinkname(anchor, tt.processedName, tt.linkname); got != canonical {
+ t.Errorf("canonicalizeLinkname() = %v, want %v", got, canonical)
+ }
+ })
+ }
+}
+
+func Test_canonicalizeName(t *testing.T) {
+ tests := []struct {
+ name string
+ fileName string
+ want turbopath.AnchoredSystemPath
+ wantErr error
+ }{
+ {
+ name: "hello world",
+ fileName: "test.txt",
+ want: "test.txt",
+ },
+ {
+ name: "directory",
+ fileName: "something/",
+ want: "something",
+ },
+ {
+ name: "malformed name",
+ fileName: "//",
+ want: "",
+ wantErr: errNameMalformed,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got, err := canonicalizeName(tt.fileName)
+ if tt.wantErr != nil && !errors.Is(err, tt.wantErr) {
+ t.Errorf("canonicalizeName() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+ if !reflect.DeepEqual(got, tt.want) {
+ t.Errorf("canonicalizeName() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestCacheItem_Restore(t *testing.T) {
+ tests := []struct {
+ name string
+ tarFiles []tarFile
+ want []turbopath.AnchoredSystemPath
+ }{
+ {
+ name: "duplicate restores",
+ tarFiles: []tarFile{
+ {
+ Header: &tar.Header{
+ Name: "target",
+ Typeflag: tar.TypeReg,
+ Mode: 0644,
+ },
+ Body: "target",
+ },
+ {
+ Header: &tar.Header{
+ Name: "source",
+ Linkname: "target",
+ Typeflag: tar.TypeSymlink,
+ Mode: 0777,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ {
+ Header: &tar.Header{
+ Name: "one/two/",
+ Typeflag: tar.TypeDir,
+ Mode: 0755,
+ },
+ },
+ },
+ want: turbopath.AnchoredUnixPathArray{"target", "source", "one", "one/two"}.ToSystemPathArray(),
+ },
+ }
+ for _, tt := range tests {
+ getTestFunc := func(compressed bool) func(t *testing.T) {
+ return func(t *testing.T) {
+ var archivePath turbopath.AbsoluteSystemPath
+ if compressed {
+ archivePath = compressTar(t, generateTar(t, tt.tarFiles))
+ } else {
+ archivePath = generateTar(t, tt.tarFiles)
+ }
+ anchor := generateAnchor(t)
+
+ cacheItem, err := Open(archivePath)
+ assert.NilError(t, err, "Open")
+
+ restoreOutput, restoreErr := cacheItem.Restore(anchor)
+ if !reflect.DeepEqual(restoreOutput, tt.want) {
+ t.Errorf("#1 CacheItem.Restore() = %v, want %v", restoreOutput, tt.want)
+ }
+ assert.NilError(t, restoreErr, "Restore #1")
+ assert.NilError(t, cacheItem.Close(), "Close")
+
+ cacheItem2, err2 := Open(archivePath)
+ assert.NilError(t, err2, "Open")
+
+ restoreOutput2, restoreErr2 := cacheItem2.Restore(anchor)
+ if !reflect.DeepEqual(restoreOutput2, tt.want) {
+ t.Errorf("#2 CacheItem.Restore() = %v, want %v", restoreOutput2, tt.want)
+ }
+ assert.NilError(t, restoreErr2, "Restore #2")
+ assert.NilError(t, cacheItem2.Close(), "Close")
+ }
+ }
+ t.Run(tt.name+"zst", getTestFunc(true))
+ t.Run(tt.name, getTestFunc(false))
+ }
+}