Skip to content

Commit

Permalink
port over tar/xz decompressors (#2139)
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman authored Sep 30, 2024
1 parent f2bc30a commit 0ad26ac
Show file tree
Hide file tree
Showing 7 changed files with 639 additions and 7 deletions.
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ require (
github.com/spf13/afero v1.11.0
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.9.0
github.com/ulikunitz/xz v0.5.12
github.com/wagoodman/go-partybus v0.0.0-20230516145632-8ccac152c651
github.com/wagoodman/go-presenter v0.0.0-20211015174752-f9c01afc824b
github.com/wagoodman/go-progress v0.0.0-20230925121702-07e42b3cdba0
golang.org/x/exp v0.0.0-20231108232855-2478ac86f678
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8
gorm.io/gorm v1.25.12
)

Expand Down Expand Up @@ -221,12 +223,10 @@ require (
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/tidwall/sjson v1.2.5 // indirect
github.com/ulikunitz/xz v0.5.12 // indirect
github.com/vbatts/go-mtree v0.5.4 // indirect
github.com/vbatts/tar-split v0.11.3 // indirect
github.com/vifraa/gopom v1.0.0 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
github.com/zclconf/go-cty v1.14.0 // indirect
github.com/zyedidia/generic v1.2.2-0.20230320175451-4410d2372cb1 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1083,8 +1083,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20231108232855-2478ac86f678 h1:mchzmB1XO2pMaKFRqk/+MV3mgGG96aqaPXaMifQU47w=
golang.org/x/exp v0.0.0-20231108232855-2478ac86f678/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 h1:aAcj0Da7eBAtrTp03QXWvm88pSyOt+UgdZw2BFZ+lEw=
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
Expand Down
25 changes: 23 additions & 2 deletions internal/file/getter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ import (

"github.com/hashicorp/go-getter"
"github.com/hashicorp/go-getter/helper/url"
"github.com/spf13/afero"
"github.com/wagoodman/go-progress"

"github.com/anchore/clio"
"github.com/anchore/grype/internal/stringutil"
"github.com/anchore/stereoscope/pkg/file"
)

var (
Expand Down Expand Up @@ -111,14 +113,33 @@ func withProgress(monitor *progress.Manual) func(client *getter.Client) error {
}

func mapToGetterClientOptions(monitors []*progress.Manual) []getter.ClientOption {
// TODO: This function is no longer needed once a generic `map` method is available.

var result []getter.ClientOption

for _, monitor := range monitors {
result = append(result, withProgress(monitor))
}

// derived from https://github.com/hashicorp/go-getter/blob/v2.2.3/decompress.go#L23-L63
fileSizeLimit := int64(5 * file.GB)

dec := getter.LimitedDecompressors(0, fileSizeLimit)
fs := afero.NewOsFs()
xzd := &xzDecompressor{
FileSizeLimit: fileSizeLimit,
Fs: fs,
}
txzd := &tarXzDecompressor{
FilesLimit: 0,
FileSizeLimit: fileSizeLimit,
Fs: fs,
}

dec["xz"] = xzd
dec["tar.xz"] = txzd
dec["txz"] = txzd

result = append(result, getter.WithDecompressors(dec))

return result
}

Expand Down
220 changes: 220 additions & 0 deletions internal/file/tar_xz_decompressor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
package file

import (
"archive/tar"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"

"github.com/spf13/afero"
"github.com/xi2/xz"
)

// Note: this is a copy of the TarXzDecompressor from https://github.com/hashicorp/go-getter/blob/v2.2.3/decompress_txz.go
// with the xz lib swapped out (for performance). A few adjustments were made:
// - refactored to use afero filesystem abstraction
// - fixed some linting issues

// TarXzDecompressor is an implementation of Decompressor that can
// decompress tar.xz files.
type tarXzDecompressor struct {
// FileSizeLimit limits the total size of all
// decompressed files.
//
// The zero value means no limit.
FileSizeLimit int64

// FilesLimit limits the number of files that are
// allowed to be decompressed.
//
// The zero value means no limit.
FilesLimit int

Fs afero.Fs
}

func (d *tarXzDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error {
// If we're going into a directory we should make that first
mkdir := dst
if !dir {
mkdir = filepath.Dir(dst)
}
if err := d.Fs.MkdirAll(mkdir, mode(0755, umask)); err != nil {
return err
}

// File first
f, err := d.Fs.Open(src)
if err != nil {
return err
}
defer f.Close()

// xz compression is second
txzR, err := xz.NewReader(f, 0)
if err != nil {
return fmt.Errorf("error opening an xz reader for %s: %s", src, err)
}

return untar(d.Fs, txzR, dst, src, dir, umask, d.FileSizeLimit, d.FilesLimit)
}

// untar is a shared helper for untarring an archive. The reader should provide
// an uncompressed view of the tar archive.
func untar(fs afero.Fs, input io.Reader, dst, src string, dir bool, umask os.FileMode, fileSizeLimit int64, filesLimit int) error { // nolint:funlen,gocognit
tarR := tar.NewReader(input)
done := false
dirHdrs := []*tar.Header{}
now := time.Now()

var (
fileSize int64
filesCount int
)

for {
if filesLimit > 0 {
filesCount++
if filesCount > filesLimit {
return fmt.Errorf("tar archive contains too many files: %d > %d", filesCount, filesLimit)
}
}

hdr, err := tarR.Next()
if err == io.EOF {
if !done {
// Empty archive
return fmt.Errorf("empty archive: %s", src)
}

break
}
if err != nil {
return err
}

switch hdr.Typeflag {
case tar.TypeSymlink, tar.TypeLink:
// to prevent any potential indirect traversal attacks
continue
case tar.TypeXGlobalHeader, tar.TypeXHeader:
// don't unpack extended headers as files
continue
}

path := dst
if dir {
// Disallow parent traversal
if containsDotDot(hdr.Name) {
return fmt.Errorf("entry contains '..': %s", hdr.Name)
}

path = filepath.Join(path, hdr.Name) // nolint:gosec // hdr.Name is checked above
}

fileInfo := hdr.FileInfo()

fileSize += fileInfo.Size()

if fileSizeLimit > 0 && fileSize > fileSizeLimit {
return fmt.Errorf("tar archive larger than limit: %d", fileSizeLimit)
}

if fileInfo.IsDir() {
if !dir {
return fmt.Errorf("expected a single file: %s", src)
}

// A directory, just make the directory and continue unarchiving...
if err := fs.MkdirAll(path, mode(0755, umask)); err != nil {
return err
}

// Record the directory information so that we may set its attributes
// after all files have been extracted
dirHdrs = append(dirHdrs, hdr)

continue
}
// There is no ordering guarantee that a file in a directory is
// listed before the directory
dstPath := filepath.Dir(path)

// Check that the directory exists, otherwise create it
if _, err := fs.Stat(dstPath); os.IsNotExist(err) {
if err := fs.MkdirAll(dstPath, mode(0755, umask)); err != nil {
return err
}
}

// We have a file. If we already decoded, then it is an error
if !dir && done {
return fmt.Errorf("expected a single file, got multiple: %s", src)
}

// Mark that we're done so future in single file mode errors
done = true

// Size limit is tracked using the returned file info.
err = copyReader(fs, path, tarR, hdr.FileInfo().Mode(), umask, 0)
if err != nil {
return err
}

// Set the access and modification time if valid, otherwise default to current time
aTime := now
mTime := now
if hdr.AccessTime.Unix() > 0 {
aTime = hdr.AccessTime
}
if hdr.ModTime.Unix() > 0 {
mTime = hdr.ModTime
}
if err := fs.Chtimes(path, aTime, mTime); err != nil {
return err
}
}

// Perform a final pass over extracted directories to update metadata
for _, dirHdr := range dirHdrs {
path := filepath.Join(dst, dirHdr.Name) // nolint:gosec // hdr.Name is checked above
// Chmod the directory since they might be created before we know the mode flags
if err := fs.Chmod(path, mode(dirHdr.FileInfo().Mode(), umask)); err != nil {
return err
}
// Set the mtime/atime attributes since they would have been changed during extraction
aTime := now
mTime := now
if dirHdr.AccessTime.Unix() > 0 {
aTime = dirHdr.AccessTime
}
if dirHdr.ModTime.Unix() > 0 {
mTime = dirHdr.ModTime
}
if err := fs.Chtimes(path, aTime, mTime); err != nil {
return err
}
}

return nil
}

// containsDotDot checks if the filepath value v contains a ".." entry.
// This will check filepath components by splitting along / or \. This
// function is copied directly from the Go net/http implementation.
func containsDotDot(v string) bool {
if !strings.Contains(v, "..") {
return false
}
for _, ent := range strings.FieldsFunc(v, isSlashRune) {
if ent == ".." {
return true
}
}
return false
}

func isSlashRune(r rune) bool { return r == '/' || r == '\\' }
Loading

0 comments on commit 0ad26ac

Please sign in to comment.