From 0ad26ac6a0f92cb4f2b0a5e6f588d74a4eb93c06 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 30 Sep 2024 16:01:59 -0400 Subject: [PATCH] port over tar/xz decompressors (#2139) Signed-off-by: Alex Goodman --- go.mod | 6 +- go.sum | 4 +- internal/file/getter.go | 25 ++- internal/file/tar_xz_decompressor.go | 220 ++++++++++++++++++++++ internal/file/tar_xz_decompressor_test.go | 207 ++++++++++++++++++++ internal/file/xz_decompressor.go | 82 ++++++++ internal/file/xz_decompressor_test.go | 102 ++++++++++ 7 files changed, 639 insertions(+), 7 deletions(-) create mode 100644 internal/file/tar_xz_decompressor.go create mode 100644 internal/file/tar_xz_decompressor_test.go create mode 100644 internal/file/xz_decompressor.go create mode 100644 internal/file/xz_decompressor_test.go diff --git a/go.mod b/go.mod index d5f8333099d..1abdd2f7a44 100644 --- a/go.mod +++ b/go.mod @@ -53,10 +53,12 @@ require ( github.com/spf13/afero v1.11.0 github.com/spf13/cobra v1.8.1 github.com/stretchr/testify v1.9.0 + github.com/ulikunitz/xz v0.5.12 github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651 github.com/wagoodman/go-presenter v0.0.0-20211015174752-f9c01afc824b github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0 - golang.org/x/exp v0.0.0-20231108232855-2478ac86f678 + github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 + golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 gorm.io/gorm v1.25.12 ) @@ -221,12 +223,10 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect github.com/tidwall/sjson v1.2.5 // indirect - github.com/ulikunitz/xz v0.5.12 // indirect github.com/vbatts/go-mtree v0.5.4 // indirect github.com/vbatts/tar-split v0.11.3 // indirect github.com/vifraa/gopom v1.0.0 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect - github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect github.com/zclconf/go-cty v1.14.0 // indirect github.com/zyedidia/generic v1.2.2-0.20230320175451-4410d2372cb1 // indirect diff --git a/go.sum b/go.sum index d85703ecf1a..cae5d2a72e5 100644 --- a/go.sum +++ b/go.sum @@ -1083,8 +1083,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20231108232855-2478ac86f678 h1:mchzmB1XO2pMaKFRqk/+MV3mgGG96aqaPXaMifQU47w= -golang.org/x/exp v0.0.0-20231108232855-2478ac86f678/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE= +golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw= +golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= diff --git a/internal/file/getter.go b/internal/file/getter.go index 12d765f5568..343d9dde1ad 100644 --- a/internal/file/getter.go +++ b/internal/file/getter.go @@ -7,10 +7,12 @@ import ( "github.com/hashicorp/go-getter" "github.com/hashicorp/go-getter/helper/url" + "github.com/spf13/afero" "github.com/wagoodman/go-progress" "github.com/anchore/clio" "github.com/anchore/grype/internal/stringutil" + "github.com/anchore/stereoscope/pkg/file" ) var ( @@ -111,14 +113,33 @@ func withProgress(monitor *progress.Manual) func(client *getter.Client) error { } func mapToGetterClientOptions(monitors []*progress.Manual) []getter.ClientOption { - // TODO: This function is no longer needed once a generic `map` method is available. - var result []getter.ClientOption for _, monitor := range monitors { result = append(result, withProgress(monitor)) } + // derived from https://github.com/hashicorp/go-getter/blob/v2.2.3/decompress.go#L23-L63 + fileSizeLimit := int64(5 * file.GB) + + dec := getter.LimitedDecompressors(0, fileSizeLimit) + fs := afero.NewOsFs() + xzd := &xzDecompressor{ + FileSizeLimit: fileSizeLimit, + Fs: fs, + } + txzd := &tarXzDecompressor{ + FilesLimit: 0, + FileSizeLimit: fileSizeLimit, + Fs: fs, + } + + dec["xz"] = xzd + dec["tar.xz"] = txzd + dec["txz"] = txzd + + result = append(result, getter.WithDecompressors(dec)) + return result } diff --git a/internal/file/tar_xz_decompressor.go b/internal/file/tar_xz_decompressor.go new file mode 100644 index 00000000000..92fb1661522 --- /dev/null +++ b/internal/file/tar_xz_decompressor.go @@ -0,0 +1,220 @@ +package file + +import ( + "archive/tar" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "github.com/spf13/afero" + "github.com/xi2/xz" +) + +// Note: this is a copy of the TarXzDecompressor from https://github.com/hashicorp/go-getter/blob/v2.2.3/decompress_txz.go +// with the xz lib swapped out (for performance). A few adjustments were made: +// - refactored to use afero filesystem abstraction +// - fixed some linting issues + +// TarXzDecompressor is an implementation of Decompressor that can +// decompress tar.xz files. +type tarXzDecompressor struct { + // FileSizeLimit limits the total size of all + // decompressed files. + // + // The zero value means no limit. + FileSizeLimit int64 + + // FilesLimit limits the number of files that are + // allowed to be decompressed. + // + // The zero value means no limit. + FilesLimit int + + Fs afero.Fs +} + +func (d *tarXzDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { + // If we're going into a directory we should make that first + mkdir := dst + if !dir { + mkdir = filepath.Dir(dst) + } + if err := d.Fs.MkdirAll(mkdir, mode(0755, umask)); err != nil { + return err + } + + // File first + f, err := d.Fs.Open(src) + if err != nil { + return err + } + defer f.Close() + + // xz compression is second + txzR, err := xz.NewReader(f, 0) + if err != nil { + return fmt.Errorf("error opening an xz reader for %s: %s", src, err) + } + + return untar(d.Fs, txzR, dst, src, dir, umask, d.FileSizeLimit, d.FilesLimit) +} + +// untar is a shared helper for untarring an archive. The reader should provide +// an uncompressed view of the tar archive. +func untar(fs afero.Fs, input io.Reader, dst, src string, dir bool, umask os.FileMode, fileSizeLimit int64, filesLimit int) error { // nolint:funlen,gocognit + tarR := tar.NewReader(input) + done := false + dirHdrs := []*tar.Header{} + now := time.Now() + + var ( + fileSize int64 + filesCount int + ) + + for { + if filesLimit > 0 { + filesCount++ + if filesCount > filesLimit { + return fmt.Errorf("tar archive contains too many files: %d > %d", filesCount, filesLimit) + } + } + + hdr, err := tarR.Next() + if err == io.EOF { + if !done { + // Empty archive + return fmt.Errorf("empty archive: %s", src) + } + + break + } + if err != nil { + return err + } + + switch hdr.Typeflag { + case tar.TypeSymlink, tar.TypeLink: + // to prevent any potential indirect traversal attacks + continue + case tar.TypeXGlobalHeader, tar.TypeXHeader: + // don't unpack extended headers as files + continue + } + + path := dst + if dir { + // Disallow parent traversal + if containsDotDot(hdr.Name) { + return fmt.Errorf("entry contains '..': %s", hdr.Name) + } + + path = filepath.Join(path, hdr.Name) // nolint:gosec // hdr.Name is checked above + } + + fileInfo := hdr.FileInfo() + + fileSize += fileInfo.Size() + + if fileSizeLimit > 0 && fileSize > fileSizeLimit { + return fmt.Errorf("tar archive larger than limit: %d", fileSizeLimit) + } + + if fileInfo.IsDir() { + if !dir { + return fmt.Errorf("expected a single file: %s", src) + } + + // A directory, just make the directory and continue unarchiving... + if err := fs.MkdirAll(path, mode(0755, umask)); err != nil { + return err + } + + // Record the directory information so that we may set its attributes + // after all files have been extracted + dirHdrs = append(dirHdrs, hdr) + + continue + } + // There is no ordering guarantee that a file in a directory is + // listed before the directory + dstPath := filepath.Dir(path) + + // Check that the directory exists, otherwise create it + if _, err := fs.Stat(dstPath); os.IsNotExist(err) { + if err := fs.MkdirAll(dstPath, mode(0755, umask)); err != nil { + return err + } + } + + // We have a file. If we already decoded, then it is an error + if !dir && done { + return fmt.Errorf("expected a single file, got multiple: %s", src) + } + + // Mark that we're done so future in single file mode errors + done = true + + // Size limit is tracked using the returned file info. + err = copyReader(fs, path, tarR, hdr.FileInfo().Mode(), umask, 0) + if err != nil { + return err + } + + // Set the access and modification time if valid, otherwise default to current time + aTime := now + mTime := now + if hdr.AccessTime.Unix() > 0 { + aTime = hdr.AccessTime + } + if hdr.ModTime.Unix() > 0 { + mTime = hdr.ModTime + } + if err := fs.Chtimes(path, aTime, mTime); err != nil { + return err + } + } + + // Perform a final pass over extracted directories to update metadata + for _, dirHdr := range dirHdrs { + path := filepath.Join(dst, dirHdr.Name) // nolint:gosec // hdr.Name is checked above + // Chmod the directory since they might be created before we know the mode flags + if err := fs.Chmod(path, mode(dirHdr.FileInfo().Mode(), umask)); err != nil { + return err + } + // Set the mtime/atime attributes since they would have been changed during extraction + aTime := now + mTime := now + if dirHdr.AccessTime.Unix() > 0 { + aTime = dirHdr.AccessTime + } + if dirHdr.ModTime.Unix() > 0 { + mTime = dirHdr.ModTime + } + if err := fs.Chtimes(path, aTime, mTime); err != nil { + return err + } + } + + return nil +} + +// containsDotDot checks if the filepath value v contains a ".." entry. +// This will check filepath components by splitting along / or \. This +// function is copied directly from the Go net/http implementation. +func containsDotDot(v string) bool { + if !strings.Contains(v, "..") { + return false + } + for _, ent := range strings.FieldsFunc(v, isSlashRune) { + if ent == ".." { + return true + } + } + return false +} + +func isSlashRune(r rune) bool { return r == '/' || r == '\\' } diff --git a/internal/file/tar_xz_decompressor_test.go b/internal/file/tar_xz_decompressor_test.go new file mode 100644 index 00000000000..69e863a3c34 --- /dev/null +++ b/internal/file/tar_xz_decompressor_test.go @@ -0,0 +1,207 @@ +package file + +import ( + "archive/tar" + "bytes" + "path/filepath" + "testing" + + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/ulikunitz/xz" +) + +func TestTarXzDecompressor_Decompress(t *testing.T) { + files := map[string]string{ + "file1.txt": "This is file 1.", + "file2.txt": "This is file 2.", + } + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createTarXzFromFiles(t, fs, files) + dstDir := filepath.Join(tmpDir, "decompressed") + + decompressor := &tarXzDecompressor{ + Fs: fs, + } + + err := decompressor.Decompress(dstDir, srcFile, true, 0000) + require.NoError(t, err) + + for name, content := range files { + data, err := afero.ReadFile(fs, filepath.Join(dstDir, name)) + require.NoError(t, err) + assert.Equal(t, content, string(data)) + } +} + +func TestTarXzDecompressor_DecompressWithNestedDirs(t *testing.T) { + files := map[string]string{ + "file1.txt": "This is file 1.", + "dir1/file2.txt": "This is file 2 in dir1.", + "dir1/dir2/file3.txt": "This is file 3 in dir1/dir2.", + "dir1/dir2/dir3/file4.txt": "This is file 4 in dir1/dir2/dir3.", + } + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createTarXzFromFiles(t, fs, files) + dstDir := filepath.Join(tmpDir, "decompressed") + + decompressor := &tarXzDecompressor{ + Fs: fs, + } + + err := decompressor.Decompress(dstDir, srcFile, true, 0000) + require.NoError(t, err) + + for name, content := range files { + data, err := afero.ReadFile(fs, filepath.Join(dstDir, name)) + require.NoError(t, err) + assert.Equal(t, content, string(data)) + } +} + +func TestTarXzDecompressor_FileSizeLimit(t *testing.T) { + files := map[string]string{ + "file1.txt": "This is file 1.", + "file2.txt": "This is file 2.", + } + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createTarXzFromFiles(t, fs, files) + dstDir := filepath.Join(tmpDir, "decompressed") + + decompressor := &tarXzDecompressor{ + FileSizeLimit: int64(10), // setting a small file size limit + Fs: fs, + } + + err := decompressor.Decompress(dstDir, srcFile, true, 0000) + require.Error(t, err) + assert.Contains(t, err.Error(), "tar archive larger than limit") +} + +func TestTarXzDecompressor_FilesLimit(t *testing.T) { + files := map[string]string{ + "file1.txt": "This is file 1.", + "file2.txt": "This is file 2.", + } + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createTarXzFromFiles(t, fs, files) + dstDir := filepath.Join(tmpDir, "decompressed") + + decompressor := &tarXzDecompressor{ + FilesLimit: 1, // setting a limit of 1 file + Fs: fs, + } + + err := decompressor.Decompress(dstDir, srcFile, true, 0000) + require.Error(t, err) + assert.Contains(t, err.Error(), "tar archive contains too many files") +} + +func TestTarXzDecompressor_DecompressSingleFile(t *testing.T) { + files := map[string]string{ + "file1.txt": "This is file 1.", + } + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createTarXzFromFiles(t, fs, files) + dstFile := filepath.Join(tmpDir, "single_file.txt") + + decompressor := &tarXzDecompressor{ + Fs: fs, + } + + err := decompressor.Decompress(dstFile, srcFile, false, 0000) + require.NoError(t, err) + + data, err := afero.ReadFile(fs, dstFile) + require.NoError(t, err) + assert.Equal(t, files["file1.txt"], string(data)) +} + +func TestTarXzDecompressor_EmptyArchive(t *testing.T) { + files := map[string]string{} + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createTarXzFromFiles(t, fs, files) + dstDir := filepath.Join(tmpDir, "decompressed") + + decompressor := &tarXzDecompressor{ + Fs: fs, + } + + err := decompressor.Decompress(dstDir, srcFile, true, 0000) + require.Error(t, err) + assert.Contains(t, err.Error(), "empty archive") +} + +func TestTarXzDecompressor_PathTraversal(t *testing.T) { + files := map[string]string{ + "../traversal_file.txt": "This file should not be extracted.", + } + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createTarXzFromFiles(t, fs, files) + dstDir := filepath.Join(tmpDir, "decompressed") + + decompressor := &tarXzDecompressor{ + Fs: fs, + } + + err := decompressor.Decompress(dstDir, srcFile, true, 0000) + require.Error(t, err) + assert.Contains(t, err.Error(), "entry contains '..'") +} + +func createTarXzFromFiles(t *testing.T, fs afero.Fs, files map[string]string) (string, string) { + t.Helper() + + tmpDir, err := afero.TempDir(fs, "", "tar_xz_decompressor_test") + require.NoError(t, err) + srcFile := filepath.Join(tmpDir, "src_file.tar.xz") + + var buf bytes.Buffer + xzWriter, err := xz.NewWriter(&buf) + require.NoError(t, err) + + tarWriter := tar.NewWriter(xzWriter) + + for name, content := range files { + dir := filepath.Dir(name) + if dir != "." { + hdr := &tar.Header{ + Name: dir + "/", + Mode: 0755, + Typeflag: tar.TypeDir, + } + err := tarWriter.WriteHeader(hdr) + require.NoError(t, err) + } + + hdr := &tar.Header{ + Name: name, + Mode: 0600, + Size: int64(len(content)), + } + err := tarWriter.WriteHeader(hdr) + require.NoError(t, err) + + _, err = tarWriter.Write([]byte(content)) + require.NoError(t, err) + } + + err = tarWriter.Close() + require.NoError(t, err) + + err = xzWriter.Close() + require.NoError(t, err) + + err = afero.WriteFile(fs, srcFile, buf.Bytes(), 0644) + require.NoError(t, err) + + return srcFile, tmpDir +} diff --git a/internal/file/xz_decompressor.go b/internal/file/xz_decompressor.go new file mode 100644 index 00000000000..c76df201e35 --- /dev/null +++ b/internal/file/xz_decompressor.go @@ -0,0 +1,82 @@ +package file + +import ( + "fmt" + "io" + "os" + "path/filepath" + + "github.com/spf13/afero" + "github.com/xi2/xz" +) + +// Note: this is a copy of the XzDecompressor from https://github.com/hashicorp/go-getter/blob/v2.2.3/decompress_xz.go +// with the xz lib swapped out (for performance). A few adjustments were made: +// - refactored to use afero filesystem abstraction +// - fixed some linting issues + +// xzDecompressor is an implementation of Decompressor that can decompress xz files. +type xzDecompressor struct { + // FileSizeLimit limits the size of a decompressed file. + // + // The zero value means no limit. + FileSizeLimit int64 + + Fs afero.Fs +} + +func (d *xzDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { + // Directory isn't supported at all + if dir { + return fmt.Errorf("xz-compressed files can only unarchive to a single file") + } + + // If we're going into a directory we should make that first + if err := d.Fs.MkdirAll(filepath.Dir(dst), mode(0755, umask)); err != nil { + return err + } + + // File first + f, err := d.Fs.Open(src) + if err != nil { + return err + } + defer f.Close() + + // xz compression is second + xzR, err := xz.NewReader(f, 0) + if err != nil { + return err + } + + // Copy it out, potentially using a file size limit. + return copyReader(d.Fs, dst, xzR, 0622, umask, d.FileSizeLimit) +} + +// copyReader copies from an io.Reader into a file, using umask to create the dst file +func copyReader(fs afero.Fs, dst string, src io.Reader, fmode, umask os.FileMode, fileSizeLimit int64) error { + dstF, err := fs.OpenFile(dst, os.O_RDWR|os.O_CREATE|os.O_TRUNC, fmode) + if err != nil { + return err + } + defer dstF.Close() + + if fileSizeLimit > 0 { + src = io.LimitReader(src, fileSizeLimit) + } + + _, err = io.Copy(dstF, src) + if err != nil { + return err + } + + // Explicitly chmod; the process umask is unconditionally applied otherwise. + // We'll mask the mode with our own umask, but that may be different than + // the process umask + return fs.Chmod(dst, mode(fmode, umask)) +} + +// mode returns the file mode masked by the umask +func mode(mode, umask os.FileMode) os.FileMode { + return mode & ^umask +} diff --git a/internal/file/xz_decompressor_test.go b/internal/file/xz_decompressor_test.go new file mode 100644 index 00000000000..a696eff0598 --- /dev/null +++ b/internal/file/xz_decompressor_test.go @@ -0,0 +1,102 @@ +package file + +import ( + "os" + "path/filepath" + "testing" + + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/ulikunitz/xz" +) + +func TestXzDecompressor_Decompress(t *testing.T) { + content := "This is a test for xz decompression." + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createXZFromString(t, fs, content) + dstFile := filepath.Join(tmpDir, "dst_file.txt") + + decompressor := &xzDecompressor{ + Fs: fs, + } + + err := decompressor.Decompress(dstFile, srcFile, false, 0000) + require.NoError(t, err) + + data, err := afero.ReadFile(fs, dstFile) + require.NoError(t, err) + assert.Equal(t, content, string(data)) +} + +func TestXzDecompressor_FileSizeLimit(t *testing.T) { + content := "This is a test for xz decompression with file size limit." + + fs := afero.NewMemMapFs() + srcFile, tmpDir := createXZFromString(t, fs, content) + dstFile := filepath.Join(tmpDir, "dst_file.txt") + + fileSizeLimit := int64(10) + + decompressor := &xzDecompressor{ + FileSizeLimit: fileSizeLimit, + Fs: fs, + } + + err := decompressor.Decompress(dstFile, srcFile, false, 0000) + require.NoError(t, err) + + data, err := afero.ReadFile(fs, dstFile) + require.NoError(t, err) + assert.Equal(t, content[:fileSizeLimit], string(data)) +} + +func TestCopyReader(t *testing.T) { + content := "This is the content for testing copyReader." + + fs := afero.NewMemMapFs() + + tmpDir := t.TempDir() + srcFile := filepath.Join(tmpDir, "src_file.txt") + err := afero.WriteFile(fs, srcFile, []byte(content), 0644) + require.NoError(t, err) + + srcF, err := fs.Open(srcFile) + require.NoError(t, err) + defer srcF.Close() + + dstFile := filepath.Join(tmpDir, "dst_file.txt") + + err = copyReader(fs, dstFile, srcF, 0644, 0000, 0) + require.NoError(t, err) + + info, err := fs.Stat(dstFile) + require.NoError(t, err) + assert.Equal(t, os.FileMode(0644), info.Mode().Perm()) + + data, err := afero.ReadFile(fs, dstFile) + assert.NoError(t, err) + assert.Equal(t, content, string(data)) +} + +func createXZFromString(t *testing.T, fs afero.Fs, content string) (string, string) { + t.Helper() + + tmpDir, err := afero.TempDir(fs, "", "xz_decompressor_test") + require.NoError(t, err) + srcFile := filepath.Join(tmpDir, "src_file.xz") + + f, err := fs.Create(srcFile) + require.NoError(t, err) + defer f.Close() + + xzW, err := xz.NewWriter(f) + require.NoError(t, err) + defer xzW.Close() + + _, err = xzW.Write([]byte(content)) + assert.NoError(t, err) + + return srcFile, tmpDir +}