buchgr · ulrfa · May 28, 2020 · mostynb · Sep 17, 2020 · ulrfa
diff --git a/BUILD.bazel b/BUILD.bazel
@@ -22,6 +22,7 @@ go_library(
         "//config:go_default_library",
         "//server:go_default_library",
         "//utils/idle:go_default_library",
+        "//utils/metrics:go_default_library",
         "//utils/rlimit:go_default_library",
         "@com_github_abbot_go_http_auth//:go_default_library",
         "@com_github_grpc_ecosystem_go_grpc_prometheus//:go_default_library",

diff --git a/README.md b/README.md
@@ -223,6 +223,23 @@ host: localhost
 
 # If true, enable experimental remote asset API support:
 #experimental_remote_asset_api: true
+
+# Allows mapping HTTP and gRPC headers to prometheus
+# labels. Headers can be set by bazel client as:
+# --remote_header=os=ubuntu18-04. Not all counters are
+# affected.
+#metrics:
+#  categories:
+#    os:
+#      - rhel7
+#      - rhel8
+#      - ubuntu16-04
+#      - ubuntu18-04
+#    branch:
+#      - master
+#    user:
+#      - ci
+
 ```
 
 ## Docker

diff --git a/cache/cache.go b/cache/cache.go
@@ -21,6 +21,8 @@ const (
 	// used for HTTP when running with the --disable_http_ac_validation
 	// commandline flag.
 	RAW
+
+	UNKNOWN
 kind, hash, instance, err := parseRequestURL(r.URL.Path, h.validateAC) 
 if err != nil { 
 	http.Error(w, err.Error(), http.StatusBadRequest) 
 	h.logResponse(http.StatusBadRequest, r, cache.UNKNOWN) 
 kind, hash, instance, err := parseRequestURL(r.URL.Path, h.validateAC) 
 if err != nil { 
 	http.Error(w, err.Error(), http.StatusBadRequest) 
 	h.logResponse(http.StatusBadRequest, r, cache.UNKNOWN) 
 )
 
 func (e EntryKind) String() string {
@@ -30,7 +32,10 @@ func (e EntryKind) String() string {
 	if e == CAS {
 		return "cas"
 	}
-	return "raw"
+	if e == RAW {
+		return "raw"
+	}
+	return "unknown"
 }
 
 // Logger is designed to be satisfied by log.Logger.

diff --git a/config/config.go b/config/config.go
@@ -35,6 +35,11 @@ type HTTPBackendConfig struct {
 	BaseURL string `yaml:"url"`
 }
 
+// Metrics stores configuration for prometheus metrics.
+type Metrics struct {
+	Categories map[string][]string `yaml:"categories"`
+}
+
 // Config holds the top-level configuration for bazel-remote.
 type Config struct {
 	Host                        string                    `yaml:"host"`
@@ -55,6 +60,7 @@ type Config struct {
 	DisableGRPCACDepsCheck      bool                      `yaml:"disable_grpc_ac_deps_check"`
 	EnableACKeyInstanceMangling bool                      `yaml:"enable_ac_key_instance_mangling"`
 	EnableEndpointMetrics       bool                      `yaml:"enable_endpoint_metrics"`
+	Metrics                     *Metrics                  `yaml:"metrics"`
 	ExperimentalRemoteAssetAPI  bool                      `yaml:"experimental_remote_asset_api"`
 	HTTPReadTimeout             time.Duration             `yaml:"http_read_timeout"`
 	HTTPWriteTimeout            time.Duration             `yaml:"http_write_timeout"`
@@ -73,6 +79,7 @@ func New(dir string, maxSize int, host string, port int, grpcPort int,
 	disableGRPCACDepsCheck bool,
 	enableACKeyInstanceMangling bool,
 	enableEndpointMetrics bool,
+	metrics *Metrics,
 	experimentalRemoteAssetAPI bool,
 	httpReadTimeout time.Duration,
 	httpWriteTimeout time.Duration) (*Config, error) {
@@ -95,6 +102,7 @@ func New(dir string, maxSize int, host string, port int, grpcPort int,
 		DisableGRPCACDepsCheck:      disableGRPCACDepsCheck,
 		EnableACKeyInstanceMangling: enableACKeyInstanceMangling,
 		EnableEndpointMetrics:       enableEndpointMetrics,
+		Metrics:                     metrics,
 		ExperimentalRemoteAssetAPI:  experimentalRemoteAssetAPI,
 		HTTPReadTimeout:             httpReadTimeout,
 		HTTPWriteTimeout:            httpWriteTimeout,

diff --git a/main.go b/main.go
@@ -22,6 +22,7 @@ import (
 	"github.com/buchgr/bazel-remote/config"
 	"github.com/buchgr/bazel-remote/server"
 	"github.com/buchgr/bazel-remote/utils/idle"
+	"github.com/buchgr/bazel-remote/utils/metrics"
 	"github.com/buchgr/bazel-remote/utils/rlimit"
 
 	grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
@@ -283,6 +284,7 @@ func main() {
 				ctx.Bool("disable_grpc_ac_deps_check"),
 				ctx.Bool("enable_ac_key_instance_mangling"),
 				ctx.Bool("enable_endpoint_metrics"),
+				nil,
 				ctx.Bool("experimental_remote_asset_api"),
 				ctx.Duration("http_read_timeout"),
 				ctx.Duration("http_write_timeout"),
@@ -311,6 +313,7 @@ func main() {
 
 		accessLogger := log.New(os.Stdout, "", logFlags)
 		errorLogger := log.New(os.Stderr, "", logFlags)
+		metrics := metrics.NewMetrics(c.Metrics)
 
 		var proxyCache cache.Proxy
 		if c.GoogleCloudStorage != nil {
@@ -344,8 +347,7 @@ func main() {
 		}
 
 		validateAC := !c.DisableHTTPACValidation
-		h := server.NewHTTPCache(diskCache, accessLogger, errorLogger, validateAC, c.EnableACKeyInstanceMangling, gitCommit)
-
+		h := server.NewHTTPCache(diskCache, accessLogger, errorLogger, metrics, validateAC, c.EnableACKeyInstanceMangling, gitCommit)
 		var htpasswdSecrets auth.SecretProvider
 		cacheHandler := h.CacheHandler
 		if c.HtpasswdFile != "" {
@@ -444,7 +446,7 @@ func main() {
 					validateAC,
 					c.EnableACKeyInstanceMangling,
 					enableRemoteAssetAPI,
-					diskCache, accessLogger, errorLogger)
+					diskCache, accessLogger, errorLogger, metrics)
 				if err3 != nil {
 					log.Fatal(err3)
 				}

diff --git a/server/BUILD.bazel b/server/BUILD.bazel
@@ -18,6 +18,7 @@ go_library(
         "//cache:go_default_library",
         "//cache/disk:go_default_library",
         "//utils/idle:go_default_library",
+        "//utils/metrics:go_default_library",
         "@com_github_abbot_go_http_auth//:go_default_library",
         "@com_github_bazelbuild_remote_apis//build/bazel/remote/asset/v1:go_default_library",
         "@com_github_bazelbuild_remote_apis//build/bazel/remote/execution/v2:go_default_library",

diff --git a/server/grpc.go b/server/grpc.go
@@ -18,7 +18,7 @@ import (
 
 	"github.com/buchgr/bazel-remote/cache"
 	"github.com/buchgr/bazel-remote/cache/disk"
-
+	"github.com/buchgr/bazel-remote/utils/metrics"
 	_ "github.com/mostynb/go-grpc-compression/snappy" // Register snappy
 	_ "github.com/mostynb/go-grpc-compression/zstd"   // and zstd support.
 )
@@ -39,6 +39,7 @@ type grpcServer struct {
 	errorLogger  cache.Logger
 	depsCheck    bool
 	mangleACKeys bool
+	metrics      metrics.Metrics
 }
 
 // ListenAndServeGRPC creates a new gRPC server and listens on the given
@@ -48,27 +49,28 @@ func ListenAndServeGRPC(addr string, opts []grpc.ServerOption,
 	validateACDeps bool,
 	mangleACKeys bool,
 	enableRemoteAssetAPI bool,
-	c *disk.Cache, a cache.Logger, e cache.Logger) error {
+	c *disk.Cache, a cache.Logger, e cache.Logger, m metrics.Metrics) error {
 
 	listener, err := net.Listen("tcp", addr)
 	if err != nil {
 		return err
 	}
 
-	return serveGRPC(listener, opts, validateACDeps, mangleACKeys, enableRemoteAssetAPI, c, a, e)
+	return serveGRPC(listener, opts, validateACDeps, mangleACKeys, enableRemoteAssetAPI, c, a, e, m)
 }
 
 func serveGRPC(l net.Listener, opts []grpc.ServerOption,
 	validateACDepsCheck bool,
 	mangleACKeys bool,
 	enableRemoteAssetAPI bool,
-	c *disk.Cache, a cache.Logger, e cache.Logger) error {
+	c *disk.Cache, a cache.Logger, e cache.Logger, m metrics.Metrics) error {
 
 	srv := grpc.NewServer(opts...)
 	s := &grpcServer{
 		cache: c, accessLogger: a, errorLogger: e,
 		depsCheck:    validateACDepsCheck,
 		mangleACKeys: mangleACKeys,
+		metrics:      m,
 	}
 	pb.RegisterActionCacheServer(srv, s)
 	pb.RegisterCapabilitiesServer(srv, s)

diff --git a/server/grpc_ac.go b/server/grpc_ac.go
@@ -13,10 +13,12 @@ import (
 	pb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2"
 	"github.com/golang/protobuf/proto"
 	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/metadata"
 	"google.golang.org/grpc/peer"
 	"google.golang.org/grpc/status"
 
 	"github.com/buchgr/bazel-remote/cache"
+	"github.com/buchgr/bazel-remote/utils/metrics"
 )
 
 var (
@@ -63,6 +65,7 @@ func (s *grpcServer) GetActionResult(ctx context.Context,
 		}
 		if rdr == nil || sizeBytes <= 0 {
 			s.accessLogger.Printf("%s %s %s", logPrefix, req.ActionDigest.Hash, "NOT FOUND")
+			s.incAcRequestMetrics(metrics.METHOD_GET, metrics.NOT_FOUND, ctx)
 			return nil, status.Error(codes.NotFound,
 				fmt.Sprintf("%s not found in AC", req.ActionDigest.Hash))
 		}
@@ -82,6 +85,7 @@ func (s *grpcServer) GetActionResult(ctx context.Context,
 		}
 
 		s.accessLogger.Printf("%s %s OK", logPrefix, req.ActionDigest.Hash)
+		s.incAcRequestMetrics(metrics.METHOD_GET, metrics.OK, ctx)
 		return result, nil
 	}
 
@@ -93,6 +97,7 @@ func (s *grpcServer) GetActionResult(ctx context.Context,
 
 	if result == nil {
 		s.accessLogger.Printf("%s %s NOT FOUND", logPrefix, req.ActionDigest.Hash)
+		s.incAcRequestMetrics(metrics.METHOD_GET, metrics.NOT_FOUND, ctx)
 		return nil, status.Error(codes.NotFound,
 			fmt.Sprintf("%s not found in AC", req.ActionDigest.Hash))
 	}
@@ -129,6 +134,7 @@ func (s *grpcServer) GetActionResult(ctx context.Context,
 	}
 
 	s.accessLogger.Printf("GRPC AC GET %s OK", req.ActionDigest.Hash)
+	s.incAcRequestMetrics(metrics.METHOD_GET, metrics.OK, ctx)
 
 	return result, nil
 }
@@ -290,6 +296,7 @@ func (s *grpcServer) UpdateActionResult(ctx context.Context,
 	}
 
 	s.accessLogger.Printf("GRPC AC PUT %s OK", req.ActionDigest.Hash)
+	s.incAcRequestMetrics(metrics.METHOD_PUT, metrics.OK, ctx)
 
 	// Trivia: the RE API wants us to return the ActionResult from the
 	// request, in order to follow this standard method style guide:
@@ -331,3 +338,8 @@ func addWorkerMetadataGRPC(ctx context.Context, ar *pb.ActionResult) {
 
 	ar.ExecutionMetadata.Worker = worker
 }
+
+func (s *grpcServer) incAcRequestMetrics(method metrics.Method, status metrics.Status, ctx context.Context) {
+	headers, _ := metadata.FromIncomingContext(ctx)
+	s.metrics.IncomingRequestCompleted(metrics.AC, method, status, headers, metrics.GRPC)
+}
diff --git a/server/grpc_test.go b/server/grpc_test.go
@@ -73,6 +73,7 @@ func TestMain(m *testing.M) {
 
 	accessLogger := testutils.NewSilentLogger()
 	errorLogger := testutils.NewSilentLogger()
+	metrics := testutils.NewMetricsStub()
 
 	listener = bufconn.Listen(bufSize)
 
@@ -87,7 +88,7 @@ func TestMain(m *testing.M) {
 			validateAC,
 			mangleACKeys,
 			enableRemoteAssetAPI,
-			diskCache, accessLogger, errorLogger)
+			diskCache, accessLogger, errorLogger, metrics)
 		if err2 != nil {
 			fmt.Println(err2)
 			os.Exit(1)