foxglove · sofuture · Sep 12, 2024 · Aug 19, 2024 · Sep 12, 2024 · Sep 12, 2024
diff --git a/charts/primary-site/templates/deployments/_inbox-container.tpl b/charts/primary-site/templates/deployments/_inbox-container.tpl
@@ -0,0 +1,124 @@
+{{- define "primary-site.inbox-container" }}
+template:
+  metadata:
+    labels:
+      app: inbox-listener
+      {{- range $key, $value := .Values.inboxListener.deployment.podLabels }}
+      {{ $key }}: {{ $value | quote }}
+      {{- end }}
+    annotations:
+      {{- range $key, $value := .Values.inboxListener.deployment.podAnnotations }}
+      {{ $key }}: {{ $value | quote }}
+      {{- end }}
+  spec:
+    volumes:
+      - name: cloud-credentials
+        secret:
+          secretName: gcp-cloud-credential
+          optional: true
+      {{- if .Values.inboxListener.deployment.localScratch.enabled }}
+      - name: local-scratch
+        emptyDir:
+          sizeLimit: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }}
+      {{- end }}
+    {{- if .Values.inboxListener.deployment.nodeSelectors }}
+    nodeSelector:
+          {{- range $key, $value := .Values.inboxListener.deployment.nodeSelectors }}
+          {{ $key }}: {{ $value | quote }}
+          {{- end }}
+        {{- end}}
+    {{- if .Values.inboxListener.deployment.serviceAccount.enabled }}
+    serviceAccount: inbox-listener
+    {{- end}}
+    containers:
+      - name: inbox-listener
+        image: us-central1-docker.pkg.dev/foxglove-images/images/inbox-listener:{{ .Chart.AppVersion }}
+        resources:
+          requests:
+            cpu: {{ .Values.inboxListener.deployment.resources.requests.cpu }}
+            memory: {{ .Values.inboxListener.deployment.resources.requests.memory }}
+            {{- if .Values.inboxListener.deployment.localScratch.enabled }}
+            ephemeral-storage: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }}
+            {{- end}}
+          limits:
+            cpu: {{ .Values.inboxListener.deployment.resources.limits.cpu }}
+            memory: {{ .Values.inboxListener.deployment.resources.limits.memory }}
+            {{- if .Values.inboxListener.deployment.localScratch.enabled }}
+            ephemeral-storage: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }}
+            {{- end}}
+        volumeMounts:
+          - mountPath: /secrets
+            name: cloud-credentials
+          {{- if .Values.inboxListener.deployment.localScratch.enabled }}
+          - mountPath: /local-scratch
+            name: local-scratch
+          {{- end }}
+        ports:
+          - name: metrics
+            containerPort: 6001
+        envFrom:
+          - secretRef:
+              name: cloud-credentials
+              optional: true
+          - secretRef:
+              name: foxglove-site-token
+              optional: true
+          {{- range $k := .Values.globals.secrets }}
+          - secretRef:
+              name: {{ $k }}
+          {{- end }}
+        env:
+          {{ with lookup "v1" "Secret" .Release.Namespace "gcp-cloud-credential" }}
+          ## The lookup is required here. The pod may have access to GCP through other means, but
+          ## the credentials in this env var take precedence, even if it's empty. An empty variable
+          ## essentially blocks GCP access.
+          - name: GOOGLE_APPLICATION_CREDENTIALS
+            value: /secrets/credentials.json
+          {{ end }}
+          - name: FOXGLOVE_API_URL
+            value: "{{ .Values.globals.foxgloveApiUrl }}"
+          {{- if .Values.globals.siteToken }}
+          - name: FOXGLOVE_SITE_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: foxglove-site
+                key: token
+                optional: false
+          {{- end }}
+          - name: MODE
+            value: self-managed
+          - name: INBOX_STORAGE_PROVIDER
+            value: "{{ .Values.globals.inbox.storageProvider }}"
+          - name: STORAGE_INBOX_BUCKET_NAME
+            value: "{{ .Values.globals.inbox.bucketName }}"
+          - name: LAKE_STORAGE_PROVIDER
+            value: "{{ .Values.globals.lake.storageProvider }}"
+          - name: STORAGE_LAKE_BUCKET_NAME
+            value: "{{ .Values.globals.lake.bucketName }}"
+          - name: STORAGE_AZURE_STORAGE_ACCOUNT_NAME
+            value: "{{ .Values.globals.azure.storageAccountName }}"
+          - name: STORAGE_AZURE_SERVICE_URL
+            value: "{{ .Values.globals.azure.serviceUrl }}"
+          - name: AWS_REGION
+            value: "{{ .Values.globals.aws.region }}"
+          - name: AWS_SDK_LOAD_CONFIG
+            value: "true"
+          - name: PROMETHEUS_METRICS_NAMESPACE
+            value: "{{ .Values.inboxListener.deployment.metrics.namespace }}"
+          - name: PROMETHEUS_METRICS_SUBSYSTEM
+            value: "{{ .Values.inboxListener.deployment.metrics.subsystem }}"
+          {{- range $item := .Values.inboxListener.deployment.env }}
+          - name: {{ $item.name }}
+            value: {{ $item.value | quote}}
+          {{- end }}
+          {{- if .Values.inboxListener.autoscaling.enabled }}
+          - name: MAX_WAIT_FOR_WORK
+            value: {{ .Values.inboxListener.autoscaling.maxWaitForWork }}
+          {{- end }}
+          {{- if .Values.inboxListener.deployment.localScratch.enabled }}
+          - name: LOCAL_SCRATCH_ROOT
+            value: "/local-scratch"
+          - name: LOCAL_SCRATCH_CAPACITY_BYTES
+            value: "{{ .Values.inboxListener.deployment.localScratch.capacityBytes }}"
+          {{- end }}
+{{- end -}}
diff --git a/charts/primary-site/templates/deployments/inbox-listener.yaml b/charts/primary-site/templates/deployments/inbox-listener.yaml
@@ -1,3 +1,26 @@
+{{- if .Values.inboxListener.autoscaling.enabled }}
+apiVersion: keda.sh/v1alpha1
+kind: ScaledJob
+metadata:
+  name: inbox-listener-scaledjob
+spec:
+  minReplicaCount: {{ .Values.inboxListener.autoscaling.minReplicas }}
+  maxReplicaCount: {{ .Values.inboxListener.autoscaling.maxReplicas }}
+  successfulJobsHistoryLimit: 50
+  failedJobsHistoryLimit: 100
+  pollingInterval: 30
+  jobTargetRef:
+    parallelism: 1
+    activeDeadlineSeconds: 86400
+    {{ include "primary-site.inbox-container" . | nindent 4 }}
+  triggers:
+    - type: metrics-api
+      metadata:
+        format: "prometheus"
+        targetValue: "2"
+        url: "http://site-controller.{{.Release.Namespace}}.svc.cluster.local:6001/metrics"
+        valueLocation: "{{ with .Values.siteController.deployment.metrics.namespace }}{{ . }}{{else}}foxglove_data_platform{{ end }}_{{ with .Values.siteController.deployment.metrics.subsystem }}{{ . }}{{else}}site_controller{{ end }}_unleased_pending_import_count"
+{{- else }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -14,121 +37,5 @@ spec:
       maxSurge: 25%
       maxUnavailable: 25%
     type: RollingUpdate
-  template:
-    metadata:
-      labels:
-        app: inbox-listener
-        {{- range $key, $value := .Values.inboxListener.deployment.podLabels }}
-        {{ $key }}: {{ $value | quote }}
-        {{- end }}
-      annotations:
-        {{- range $key, $value := .Values.inboxListener.deployment.podAnnotations }}
-        {{ $key }}: {{ $value | quote }}
-        {{- end }}
-    spec:
-      volumes:
-        - name: cloud-credentials
-          secret:
-            secretName: gcp-cloud-credential
-            optional: true
-        {{- if .Values.inboxListener.deployment.localScratch.enabled }}
-        - name: local-scratch
-          emptyDir:
-            sizeLimit: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }}
-        {{- end }}
-      containers:
-        - name: inbox-listener
-          image: us-central1-docker.pkg.dev/foxglove-images/images/inbox-listener:{{ .Chart.AppVersion }}
-          resources:
-            requests:
-              cpu: {{ .Values.inboxListener.deployment.resources.requests.cpu }}
-              memory: {{ .Values.inboxListener.deployment.resources.requests.memory }}
-              {{- if .Values.inboxListener.deployment.localScratch.enabled }}
-              ephemeral-storage: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }}
-              {{- end}}
-            limits:
-              cpu: {{ .Values.inboxListener.deployment.resources.limits.cpu }}
-              memory: {{ .Values.inboxListener.deployment.resources.limits.memory }}
-              {{- if .Values.inboxListener.deployment.localScratch.enabled }}
-              ephemeral-storage: {{ .Values.inboxListener.deployment.localScratch.capacityBytes }}
-              {{- end}}
-          volumeMounts:
-            - mountPath: /secrets
-              name: cloud-credentials
-            {{- if .Values.inboxListener.deployment.localScratch.enabled }}
-            - mountPath: /local-scratch
-              name: local-scratch
-            {{- end }}
-          ports:
-            - name: metrics
-              containerPort: 6001
-          envFrom:
-            - secretRef:
-                name: cloud-credentials
-                optional: true
-            - secretRef:
-                name: foxglove-site-token
-                optional: true
-            {{- range $k := .Values.globals.secrets }}
-            - secretRef:
-                name: {{ $k }}
-            {{- end }}
-          env:
-            {{ with lookup "v1" "Secret" .Release.Namespace "gcp-cloud-credential" }}
-            ## The lookup is required here. The pod may have access to GCP through other means, but
-            ## the credentials in this env var take precedence, even if it's empty. An empty variable
-            ## essentially blocks GCP access.
-            - name: GOOGLE_APPLICATION_CREDENTIALS
-              value: /secrets/credentials.json
-            {{ end }}
-            - name: FOXGLOVE_API_URL
-              value: "{{ .Values.globals.foxgloveApiUrl }}"
-            {{- if .Values.globals.siteToken }}
-            - name: FOXGLOVE_SITE_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: foxglove-site
-                  key: token
-                  optional: false
-            {{- end }}
-            - name: MODE
-              value: self-managed
-            - name: INBOX_STORAGE_PROVIDER
-              value: "{{ .Values.globals.inbox.storageProvider }}"
-            - name: STORAGE_INBOX_BUCKET_NAME
-              value: "{{ .Values.globals.inbox.bucketName }}"
-            - name: LAKE_STORAGE_PROVIDER
-              value: "{{ .Values.globals.lake.storageProvider }}"
-            - name: STORAGE_LAKE_BUCKET_NAME
-              value: "{{ .Values.globals.lake.bucketName }}"
-            - name: STORAGE_AZURE_STORAGE_ACCOUNT_NAME
-              value: "{{ .Values.globals.azure.storageAccountName }}"
-            - name: STORAGE_AZURE_SERVICE_URL
-              value: "{{ .Values.globals.azure.serviceUrl }}"
-            - name: AWS_REGION
-              value: "{{ .Values.globals.aws.region }}"
-            - name: AWS_SDK_LOAD_CONFIG
-              value: "true"
-            - name: PROMETHEUS_METRICS_NAMESPACE
-              value: "{{ .Values.inboxListener.deployment.metrics.namespace }}"
-            - name: PROMETHEUS_METRICS_SUBSYSTEM
-              value: "{{ .Values.inboxListener.deployment.metrics.subsystem }}"
-            {{- range $item := .Values.inboxListener.deployment.env }}
-            - name: {{ $item.name }}
-              value: {{ $item.value | quote}}
-            {{- end }}
-            {{- if .Values.inboxListener.deployment.localScratch.enabled }}
-            - name: LOCAL_SCRATCH_ROOT
-              value: "/local-scratch"
-            - name: LOCAL_SCRATCH_CAPACITY_BYTES
-              value: "{{ .Values.inboxListener.deployment.localScratch.capacityBytes }}"
-            {{- end }}
-      {{- if .Values.inboxListener.deployment.serviceAccount.enabled }}
-      serviceAccount: inbox-listener
-      {{- end}}
-      {{- if .Values.inboxListener.deployment.nodeSelectors }}
-      nodeSelector:
-        {{- range $key, $value := .Values.inboxListener.deployment.nodeSelectors }}
-        {{ $key }}: {{ $value | quote }}
-        {{- end }}
-      {{- end}}
+  {{ include "primary-site.inbox-container" . | nindent 2 }}
+{{- end }}
diff --git a/charts/primary-site/templates/services/site-controller.yaml b/charts/primary-site/templates/services/site-controller.yaml
@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: site-controller
+  annotations:
+    {{- range $key, $value := .Values.siteController.service.annotations }}
+    {{ $key }}: {{ $value | quote }}
+    {{- end }}
+spec:
+  type: ClusterIP
+  ports:
+    - name: metrics
+      port: 6001
+      protocol: TCP
+      targetPort: 6001
+  selector:
+    app: site-controller
diff --git a/charts/primary-site/values.yaml b/charts/primary-site/values.yaml
@@ -97,6 +97,23 @@ inboxListener:
       ## annotations:
       ##   eks.amazonaws.com/role-arn: arn:aws:iam::xxxxxxxxxxxx:role/foxglove-inbox-listener-sa-role
 
+  # To enable the autoscaling built into this chart, you must install KEDA first
+  # helm repo add kedacore https://kedacore.github.io/charts
+  # helm repo update
+  # helm install keda kedacore/keda --namespace keda --create-namespace
+  autoscaling:
+    enabled: false
+    # minReplicas can be raised if time to start processing incoming files is slower than desired
+    # 1 is a good default for almost all use-cases
+    minReplicas: 1
+    # maxReplicas can be raised if you constantly have a very large number of incoming files to process
+    # it should be set to a value that allows your site to process incoming files at peak load
+    maxReplicas: 10
+    # This value, supplied as duration string (https://pkg.go.dev/time#ParseDuration) determines how long a pod will
+    # wait for new work items. It is unlikely that this value should be changed. The value should only be set when
+    # using this auto-scaling.
+    maxWaitForWork: "30s"
+
 streamService:
   service:
     annotations: {}
@@ -127,6 +144,8 @@ streamService:
       ##   eks.amazonaws.com/role-arn: arn:aws:iam::xxxxxxxxxxxx:role/foxglove-stream-service-sa-role
 
 siteController:
+  service:
+    annotations: {}
   deployment:
     resources:
       requests: