From 9722ad7a41875e0f410dbb35e7bc66a1dd0785bc Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Fri, 8 Nov 2024 14:19:39 -0800 Subject: [PATCH 1/4] add metrics to s3 scan --- pkg/sources/s3/metrics.go | 97 +++++++++++++++++++++++++++++++++++++++ pkg/sources/s3/s3.go | 20 +++++++- 2 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 pkg/sources/s3/metrics.go diff --git a/pkg/sources/s3/metrics.go b/pkg/sources/s3/metrics.go new file mode 100644 index 000000000000..6ad8c7fce58f --- /dev/null +++ b/pkg/sources/s3/metrics.go @@ -0,0 +1,97 @@ +package s3 + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" +) + +// metricsCollector defines the interface for recording S3 scan metrics. +type metricsCollector interface { + // Object metrics. + + RecordObjectScanned(bucket string) + RecordObjectSkipped(bucket, reason string) + RecordObjectError(bucket string) + + // Role metrics. + + RecordRoleScanned(roleArn string) + RecordBucketForRole(roleArn string) +} + +type collector struct { + objectsScanned *prometheus.CounterVec + objectsSkipped *prometheus.CounterVec + objectsErrors *prometheus.CounterVec + rolesScanned *prometheus.GaugeVec + bucketsPerRole *prometheus.GaugeVec +} + +func newS3MetricsCollector() metricsCollector { + return &collector{ + objectsScanned: promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "objects_scanned_total", + Help: "Total number of S3 objects successfully scanned", + }, []string{"bucket"}), + + objectsSkipped: promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "objects_skipped_total", + Help: "Total number of S3 objects skipped during scan", + }, []string{"bucket", "reason"}), + + objectsErrors: promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "objects_errors_total", + Help: "Total number of errors encountered during S3 scan", + }, []string{"bucket"}), + + rolesScanned: promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "roles_scanned", + Help: "Number of AWS roles being scanned", + }, []string{"role_arn"}), + + bucketsPerRole: promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: common.MetricsNamespace, + Subsystem: common.MetricsSubsystem, + Name: "buckets_per_role", + Help: "Number of buckets accessible per AWS role", + }, []string{"role_arn"}), + } +} + +func (c *collector) RecordObjectScanned(bucket string) { + c.objectsScanned.WithLabelValues(bucket).Inc() +} + +func (c *collector) RecordObjectSkipped(bucket, reason string) { + c.objectsSkipped.WithLabelValues(bucket, reason).Inc() +} + +func (c *collector) RecordObjectError(bucket string) { + c.objectsErrors.WithLabelValues(bucket).Inc() +} + +const defaultRoleARN = "default" + +func (c *collector) RecordRoleScanned(roleArn string) { + if roleArn == "" { + roleArn = defaultRoleARN + } + c.rolesScanned.WithLabelValues(roleArn).Set(1) +} + +func (c *collector) RecordBucketForRole(roleArn string) { + if roleArn == "" { + roleArn = defaultRoleARN + } + c.bucketsPerRole.WithLabelValues(roleArn).Inc() +} diff --git a/pkg/sources/s3/s3.go b/pkg/sources/s3/s3.go index 7d2790c73afd..151f66283034 100644 --- a/pkg/sources/s3/s3.go +++ b/pkg/sources/s3/s3.go @@ -48,6 +48,7 @@ type Source struct { progressTracker *ProgressTracker sources.Progress + metricsCollector metricsCollector errorCount *sync.Map jobPool *errgroup.Group @@ -98,6 +99,7 @@ func (s *Source) Init( if err != nil { return err } + s.metricsCollector = newS3MetricsCollector() s.setMaxObjectSize(conn.GetMaxObjectSize()) @@ -282,6 +284,8 @@ func (s *Source) scanBuckets( bucketsToScanCount := len(bucketsToScan) for i := startIdx; i < bucketsToScanCount; i++ { + s.metricsCollector.RecordBucketForRole(role) + bucket := bucketsToScan[i] ctx := context.WithValue(ctx, "bucket", bucket) @@ -413,6 +417,7 @@ func (s *Source) pageChunker( for objIdx, obj := range metadata.page.Contents { if obj == nil { + s.metricsCollector.RecordObjectSkipped(metadata.bucket, "nil_object") if err := s.progressTracker.UpdateObjectProgress(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil { ctx.Logger().Error(err, "could not update progress for nil object") } @@ -429,6 +434,7 @@ func (s *Source) pageChunker( // Skip GLACIER and GLACIER_IR objects. if obj.StorageClass == nil || strings.Contains(*obj.StorageClass, "GLACIER") { ctx.Logger().V(5).Info("Skipping object in storage class", "storage_class", *obj.StorageClass) + s.metricsCollector.RecordObjectSkipped(metadata.bucket, "storage_class") if err := s.progressTracker.UpdateObjectProgress(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil { ctx.Logger().Error(err, "could not update progress for glacier object") } @@ -438,6 +444,7 @@ func (s *Source) pageChunker( // Ignore large files. if *obj.Size > s.maxObjectSize { ctx.Logger().V(5).Info("Skipping %d byte file (over maxObjectSize limit)") + s.metricsCollector.RecordObjectSkipped(metadata.bucket, "size_limit") if err := s.progressTracker.UpdateObjectProgress(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil { ctx.Logger().Error(err, "could not update progress for large file") } @@ -447,6 +454,7 @@ func (s *Source) pageChunker( // File empty file. if *obj.Size == 0 { ctx.Logger().V(5).Info("Skipping empty file") + s.metricsCollector.RecordObjectSkipped(metadata.bucket, "empty_file") if err := s.progressTracker.UpdateObjectProgress(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil { ctx.Logger().Error(err, "could not update progress for empty file") } @@ -456,6 +464,7 @@ func (s *Source) pageChunker( // Skip incompatible extensions. if common.SkipFile(*obj.Key) { ctx.Logger().V(5).Info("Skipping file with incompatible extension") + s.metricsCollector.RecordObjectSkipped(metadata.bucket, "incompatible_extension") if err := s.progressTracker.UpdateObjectProgress(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil { ctx.Logger().Error(err, "could not update progress for incompatible file") } @@ -471,6 +480,7 @@ func (s *Source) pageChunker( if strings.HasSuffix(*obj.Key, "/") { ctx.Logger().V(5).Info("Skipping directory") + s.metricsCollector.RecordObjectSkipped(metadata.bucket, "directory") return nil } @@ -496,8 +506,12 @@ func (s *Source) pageChunker( Key: obj.Key, }) if err != nil { - if !strings.Contains(err.Error(), "AccessDenied") { + if strings.Contains(err.Error(), "AccessDenied") { + ctx.Logger().Error(err, "could not get S3 object; access denied") + s.metricsCollector.RecordObjectSkipped(metadata.bucket, "access_denied") + } else { ctx.Logger().Error(err, "could not get S3 object") + s.metricsCollector.RecordObjectError(metadata.bucket) } // According to the documentation for GetObjectWithContext, // the response can be non-nil even if there was an error. @@ -551,6 +565,7 @@ func (s *Source) pageChunker( if err := handlers.HandleFile(ctx, res.Body, chunkSkel, sources.ChanReporter{Ch: chunksChan}); err != nil { ctx.Logger().Error(err, "error handling file") + s.metricsCollector.RecordObjectError(metadata.bucket) return nil } @@ -568,6 +583,7 @@ func (s *Source) pageChunker( if err := s.progressTracker.UpdateObjectProgress(ctx, objIdx, metadata.bucket, metadata.page.Contents); err != nil { ctx.Logger().Error(err, "could not update progress for scanned object") } + s.metricsCollector.RecordObjectScanned(metadata.bucket) return nil }) @@ -629,6 +645,8 @@ func (s *Source) visitRoles( } for _, role := range roles { + s.metricsCollector.RecordRoleScanned(role) + client, err := s.newClient(defaultAWSRegion, role) if err != nil { return fmt.Errorf("could not create s3 client: %w", err) From a72e25accfb4b58a6514fe4c34a4d6beaa344b26 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Fri, 8 Nov 2024 14:32:16 -0800 Subject: [PATCH 2/4] make collector a singleton --- pkg/sources/s3/metrics.go | 7 +++++++ pkg/sources/s3/s3.go | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/sources/s3/metrics.go b/pkg/sources/s3/metrics.go index 6ad8c7fce58f..9463a9a4daf3 100644 --- a/pkg/sources/s3/metrics.go +++ b/pkg/sources/s3/metrics.go @@ -29,6 +29,13 @@ type collector struct { bucketsPerRole *prometheus.GaugeVec } +var metricsInstance metricsCollector + +func init() { metricsInstance = newS3MetricsCollector() } + +// getMetricsCollector returns the singleton metrics collector instance.. +func getMetricsCollector() metricsCollector { return metricsInstance } + func newS3MetricsCollector() metricsCollector { return &collector{ objectsScanned: promauto.NewCounterVec(prometheus.CounterOpts{ diff --git a/pkg/sources/s3/s3.go b/pkg/sources/s3/s3.go index 151f66283034..3d5beba32557 100644 --- a/pkg/sources/s3/s3.go +++ b/pkg/sources/s3/s3.go @@ -99,7 +99,7 @@ func (s *Source) Init( if err != nil { return err } - s.metricsCollector = newS3MetricsCollector() + s.metricsCollector = getMetricsCollector() s.setMaxObjectSize(conn.GetMaxObjectSize()) From c3dd5f9906906c6c5bc99c22fb3e5ad8f485961c Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sat, 16 Nov 2024 12:29:07 -0800 Subject: [PATCH 3/4] address comments --- pkg/sources/s3/metrics.go | 9 ++------- pkg/sources/s3/s3.go | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/pkg/sources/s3/metrics.go b/pkg/sources/s3/metrics.go index 9463a9a4daf3..3eb8b7c7bc00 100644 --- a/pkg/sources/s3/metrics.go +++ b/pkg/sources/s3/metrics.go @@ -31,13 +31,8 @@ type collector struct { var metricsInstance metricsCollector -func init() { metricsInstance = newS3MetricsCollector() } - -// getMetricsCollector returns the singleton metrics collector instance.. -func getMetricsCollector() metricsCollector { return metricsInstance } - -func newS3MetricsCollector() metricsCollector { - return &collector{ +func init() { + metricsInstance = &collector{ objectsScanned: promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: common.MetricsNamespace, Subsystem: common.MetricsSubsystem, diff --git a/pkg/sources/s3/s3.go b/pkg/sources/s3/s3.go index 3d5beba32557..a3a9f494388c 100644 --- a/pkg/sources/s3/s3.go +++ b/pkg/sources/s3/s3.go @@ -99,7 +99,7 @@ func (s *Source) Init( if err != nil { return err } - s.metricsCollector = getMetricsCollector() + s.metricsCollector = metricsInstance s.setMaxObjectSize(conn.GetMaxObjectSize()) From d31116c07d2056e7c30b51d818008bbee35caed0 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sat, 16 Nov 2024 12:31:32 -0800 Subject: [PATCH 4/4] fix --- pkg/sources/s3/s3.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/sources/s3/s3.go b/pkg/sources/s3/s3.go index a3a9f494388c..90326505f53d 100644 --- a/pkg/sources/s3/s3.go +++ b/pkg/sources/s3/s3.go @@ -94,11 +94,7 @@ func (s *Source) Init( } s.conn = &conn - var err error - s.progressTracker, err = NewProgressTracker(ctx, conn.GetEnableResumption(), &s.Progress) - if err != nil { - return err - } + s.progressTracker = NewProgressTracker(ctx, conn.GetEnableResumption(), &s.Progress) s.metricsCollector = metricsInstance s.setMaxObjectSize(conn.GetMaxObjectSize()) @@ -412,7 +408,7 @@ func (s *Source) pageChunker( state processingState, chunksChan chan *sources.Chunk, ) { - s.progressTracker.Reset(ctx) + s.progressTracker.Reset() ctx = context.WithValues(ctx, "bucket", metadata.bucket, "page_number", metadata.pageNumber) for objIdx, obj := range metadata.page.Contents {