From 80e33f92f4c91bc0829db7a3cd8265404a01e597 Mon Sep 17 00:00:00 2001 From: Ruben Vargas Date: Sat, 21 Sep 2024 00:56:15 -0600 Subject: [PATCH] Add support for memberlist bind network configuration Signed-off-by: Ruben Vargas --- .chloggen/fix_hashring.yaml | 19 +++++++ apis/tempo/v1alpha1/tempostack_types.go | 22 +++++++ .../tempo-operator.clusterserviceversion.yaml | 11 +++- .../tempo.grafana.com_tempostacks.yaml | 10 ++++ .../tempo-operator.clusterserviceversion.yaml | 11 +++- .../tempo.grafana.com_tempostacks.yaml | 10 ++++ .../bases/tempo.grafana.com_tempostacks.yaml | 10 ++++ .../tempo-operator.clusterserviceversion.yaml | 9 +++ .../tempo-operator.clusterserviceversion.yaml | 9 +++ internal/manifests/config/build.go | 20 ++++++- internal/manifests/config/options.go | 5 +- internal/manifests/config/tempo-config.yaml | 6 ++ internal/manifests/distributor/distributor.go | 1 + internal/manifests/ingester/ingester.go | 1 + internal/manifests/memberlist/gossip.go | 57 ++++++++++++++++++- 15 files changed, 194 insertions(+), 7 deletions(-) create mode 100755 .chloggen/fix_hashring.yaml diff --git a/.chloggen/fix_hashring.yaml b/.chloggen/fix_hashring.yaml new file mode 100755 index 000000000..10899d434 --- /dev/null +++ b/.chloggen/fix_hashring.yaml @@ -0,0 +1,19 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. tempostack, tempomonolithic, github action) +component: tempostack + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add support for memberlist bind network configuration + +# One or more tracking issues related to the change +issues: [1060] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + Adds support to configure the memberlist bind_addr field using the pod network IP range instead of the default private network range used. + In managed Kubernetes/OpenShift cluster environments as well as in special on-prem setup the private IP range might not be available for using them. + With this change set the TempoStack administrator can choose as a bind address the current pod network IP assigned by the cluster's pod network. diff --git a/apis/tempo/v1alpha1/tempostack_types.go b/apis/tempo/v1alpha1/tempostack_types.go index e868b207d..f71123172 100644 --- a/apis/tempo/v1alpha1/tempostack_types.go +++ b/apis/tempo/v1alpha1/tempostack_types.go @@ -303,6 +303,18 @@ var AllStatusConditions = []ConditionStatus{ConditionReady, ConditionFailed, Con // ConditionReason defines possible reasons for each condition. type ConditionReason string +// InstanceAddrType defines the type of pod network to use for advertising IPs to the ring. +// +// +kubebuilder:validation:Enum=default;podIP +type InstanceAddrType string + +const ( + // InstanceAddrDefault when using the first from any private network interfaces (RFC 1918 and RFC 6598). + InstanceAddrDefault InstanceAddrType = "default" + // InstanceAddrPodIP when using the public pod IP from the cluster's pod network. + InstanceAddrPodIP InstanceAddrType = "podIP" +) + const ( // ReasonReady defines a healthy tempo instance. ReasonReady ConditionReason = "Ready" @@ -423,6 +435,16 @@ type MemberListSpec struct { // +kubebuilder:validation:Optional // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch",displayName="Enable IPv6" EnableIPv6 *bool `json:"enableIPv6,omitempty"` + + // InstanceAddrType defines the type of address to use to advertise to the ring. + // Defaults to the first address from any private network interfaces of the current pod. + // Alternatively the public pod IP can be used in case private networks (RFC 1918 and RFC 6598) + // are not available. + // + // +optional + // +kubebuilder:validation:optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors={"urn:alm:descriptor:com.tectonic.ui:select:default","urn:alm:descriptor:com.tectonic.ui:select:podIP"},displayName="Instance Address" + InstanceAddrType InstanceAddrType `json:"instanceAddrType,omitempty"` } // HashRingSpec defines the hash ring configuration. diff --git a/bundle/community/manifests/tempo-operator.clusterserviceversion.yaml b/bundle/community/manifests/tempo-operator.clusterserviceversion.yaml index e597be8a4..c90de1319 100644 --- a/bundle/community/manifests/tempo-operator.clusterserviceversion.yaml +++ b/bundle/community/manifests/tempo-operator.clusterserviceversion.yaml @@ -74,7 +74,7 @@ metadata: capabilities: Deep Insights categories: Logging & Tracing,Monitoring containerImage: ghcr.io/grafana/tempo-operator/tempo-operator:v0.13.0 - createdAt: "2024-10-10T15:57:46Z" + createdAt: "2024-10-12T18:04:08Z" description: Create and manage deployments of Tempo, a high-scale distributed tracing backend. operatorframework.io/cluster-monitoring: "true" @@ -584,6 +584,15 @@ spec: path: hashRing.memberlist.enableIPv6 x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: InstanceAddrType defines the type of address to use to advertise + to the ring. Defaults to the first address from any private network interfaces + of the current pod. Alternatively the public pod IP can be used in case + private networks (RFC 1918 and RFC 6598) are not available. + displayName: Instance Address + path: hashRing.memberlist.instanceAddrType + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:select:default + - urn:alm:descriptor:com.tectonic.ui:select:podIP - description: Images defines the image for each container. displayName: Container Images path: images diff --git a/bundle/community/manifests/tempo.grafana.com_tempostacks.yaml b/bundle/community/manifests/tempo.grafana.com_tempostacks.yaml index 6ec9eaffb..ad90ab922 100644 --- a/bundle/community/manifests/tempo.grafana.com_tempostacks.yaml +++ b/bundle/community/manifests/tempo.grafana.com_tempostacks.yaml @@ -79,6 +79,16 @@ spec: description: EnableIPv6 enables IPv6 support for the memberlist based hash ring. type: boolean + instanceAddrType: + description: |- + InstanceAddrType defines the type of address to use to advertise to the ring. + Defaults to the first address from any private network interfaces of the current pod. + Alternatively the public pod IP can be used in case private networks (RFC 1918 and RFC 6598) + are not available. + enum: + - default + - podIP + type: string type: object type: object images: diff --git a/bundle/openshift/manifests/tempo-operator.clusterserviceversion.yaml b/bundle/openshift/manifests/tempo-operator.clusterserviceversion.yaml index 172fe9a94..504d78028 100644 --- a/bundle/openshift/manifests/tempo-operator.clusterserviceversion.yaml +++ b/bundle/openshift/manifests/tempo-operator.clusterserviceversion.yaml @@ -74,7 +74,7 @@ metadata: capabilities: Deep Insights categories: Logging & Tracing,Monitoring containerImage: ghcr.io/grafana/tempo-operator/tempo-operator:v0.13.0 - createdAt: "2024-10-10T15:57:44Z" + createdAt: "2024-10-12T18:04:06Z" description: Create and manage deployments of Tempo, a high-scale distributed tracing backend. operatorframework.io/cluster-monitoring: "true" @@ -584,6 +584,15 @@ spec: path: hashRing.memberlist.enableIPv6 x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: InstanceAddrType defines the type of address to use to advertise + to the ring. Defaults to the first address from any private network interfaces + of the current pod. Alternatively the public pod IP can be used in case + private networks (RFC 1918 and RFC 6598) are not available. + displayName: Instance Address + path: hashRing.memberlist.instanceAddrType + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:select:default + - urn:alm:descriptor:com.tectonic.ui:select:podIP - description: Images defines the image for each container. displayName: Container Images path: images diff --git a/bundle/openshift/manifests/tempo.grafana.com_tempostacks.yaml b/bundle/openshift/manifests/tempo.grafana.com_tempostacks.yaml index 6ec9eaffb..ad90ab922 100644 --- a/bundle/openshift/manifests/tempo.grafana.com_tempostacks.yaml +++ b/bundle/openshift/manifests/tempo.grafana.com_tempostacks.yaml @@ -79,6 +79,16 @@ spec: description: EnableIPv6 enables IPv6 support for the memberlist based hash ring. type: boolean + instanceAddrType: + description: |- + InstanceAddrType defines the type of address to use to advertise to the ring. + Defaults to the first address from any private network interfaces of the current pod. + Alternatively the public pod IP can be used in case private networks (RFC 1918 and RFC 6598) + are not available. + enum: + - default + - podIP + type: string type: object type: object images: diff --git a/config/crd/bases/tempo.grafana.com_tempostacks.yaml b/config/crd/bases/tempo.grafana.com_tempostacks.yaml index 9b156e754..da0c38c5e 100644 --- a/config/crd/bases/tempo.grafana.com_tempostacks.yaml +++ b/config/crd/bases/tempo.grafana.com_tempostacks.yaml @@ -75,6 +75,16 @@ spec: description: EnableIPv6 enables IPv6 support for the memberlist based hash ring. type: boolean + instanceAddrType: + description: |- + InstanceAddrType defines the type of address to use to advertise to the ring. + Defaults to the first address from any private network interfaces of the current pod. + Alternatively the public pod IP can be used in case private networks (RFC 1918 and RFC 6598) + are not available. + enum: + - default + - podIP + type: string type: object type: object images: diff --git a/config/manifests/community/bases/tempo-operator.clusterserviceversion.yaml b/config/manifests/community/bases/tempo-operator.clusterserviceversion.yaml index 183b5d776..6500bcdf8 100644 --- a/config/manifests/community/bases/tempo-operator.clusterserviceversion.yaml +++ b/config/manifests/community/bases/tempo-operator.clusterserviceversion.yaml @@ -513,6 +513,15 @@ spec: path: hashRing.memberlist.enableIPv6 x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: InstanceAddrType defines the type of address to use to advertise + to the ring. Defaults to the first address from any private network interfaces + of the current pod. Alternatively the public pod IP can be used in case + private networks (RFC 1918 and RFC 6598) are not available. + displayName: Instance Address + path: hashRing.memberlist.instanceAddrType + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:select:default + - urn:alm:descriptor:com.tectonic.ui:select:podIP - description: Images defines the image for each container. displayName: Container Images path: images diff --git a/config/manifests/openshift/bases/tempo-operator.clusterserviceversion.yaml b/config/manifests/openshift/bases/tempo-operator.clusterserviceversion.yaml index 55a0cd54a..4f3f7f264 100644 --- a/config/manifests/openshift/bases/tempo-operator.clusterserviceversion.yaml +++ b/config/manifests/openshift/bases/tempo-operator.clusterserviceversion.yaml @@ -513,6 +513,15 @@ spec: path: hashRing.memberlist.enableIPv6 x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch + - description: InstanceAddrType defines the type of address to use to advertise + to the ring. Defaults to the first address from any private network interfaces + of the current pod. Alternatively the public pod IP can be used in case + private networks (RFC 1918 and RFC 6598) are not available. + displayName: Instance Address + path: hashRing.memberlist.instanceAddrType + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:select:default + - urn:alm:descriptor:com.tectonic.ui:select:podIP - description: Images defines the image for each container. displayName: Container Images path: images diff --git a/internal/manifests/config/build.go b/internal/manifests/config/build.go index 9b17bc705..96a4bc205 100644 --- a/internal/manifests/config/build.go +++ b/internal/manifests/config/build.go @@ -4,6 +4,7 @@ import ( "bytes" "embed" "fmt" + "github.com/grafana/tempo-operator/internal/manifests/memberlist" "html/template" "io" "path" @@ -73,8 +74,9 @@ func buildConfiguration(params manifestutils.Params) ([]byte, error) { StorageParams: params.StorageParams, GlobalRetention: tempo.Spec.Retention.Global.Traces.Duration.String(), MemberList: memberlistOptions{ - JoinMembers: []string{naming.Name("gossip-ring", tempo.Name)}, - EnableIPv6: ptr.Deref(tempo.Spec.HashRing.MemberList.EnableIPv6, false), + JoinMembers: []string{naming.Name("gossip-ring", tempo.Name)}, + EnableIPv6: ptr.Deref(tempo.Spec.HashRing.MemberList.EnableIPv6, false), + InstanceAddr: gossipRingInstanceAddr(tempo.Spec.HashRing), }, QueryFrontendDiscovery: fmt.Sprintf("%s:%d", naming.Name("query-frontend-discovery", tempo.Name), manifestutils.PortGRPCServer), GlobalRateLimits: fromRateLimitSpecToRateLimitOptions(tempo.Spec.LimitSpec.Global), @@ -256,3 +258,17 @@ func renderTempoQueryTemplate(opts tempoQueryOptions) ([]byte, error) { return cfg, nil } + +func gossipRingInstanceAddr(spec v1alpha1.HashRingSpec) string { + var instanceAddr string + switch spec.MemberList.InstanceAddrType { + case v1alpha1.InstanceAddrPodIP: + instanceAddr = fmt.Sprintf("${%s}", memberlist.GossipInstanceAddrEnvVarName) + case v1alpha1.InstanceAddrDefault: + // Do nothing use loki defaults + default: + // Do nothing use loki defaults + } + + return instanceAddr +} diff --git a/internal/manifests/config/options.go b/internal/manifests/config/options.go index d34519764..b39a27074 100644 --- a/internal/manifests/config/options.go +++ b/internal/manifests/config/options.go @@ -74,8 +74,9 @@ type tlsOptions struct { } type memberlistOptions struct { - JoinMembers []string - EnableIPv6 bool + JoinMembers []string + EnableIPv6 bool + InstanceAddr string } type receiverTLSOptions struct { diff --git a/internal/manifests/config/tempo-config.yaml b/internal/manifests/config/tempo-config.yaml index cee5abb49..6b733628c 100644 --- a/internal/manifests/config/tempo-config.yaml +++ b/internal/manifests/config/tempo-config.yaml @@ -90,11 +90,17 @@ distributor: ring: kvstore: store: memberlist + {{ - with .MemberList.InstanceAddr }} + instance_addr: {{ . }} + {{ - end }} ingester: lifecycler: ring: kvstore: store: memberlist + {{ - with .MemberList.InstanceAddr }} + instance_addr: {{ . }} + {{ - end }} replication_factor: {{ .ReplicationFactor }} tokens_file_path: /var/tempo/tokens.json {{- if .MemberList.EnableIPv6 }} diff --git a/internal/manifests/distributor/distributor.go b/internal/manifests/distributor/distributor.go index 20ed1a101..a0d66294b 100644 --- a/internal/manifests/distributor/distributor.go +++ b/internal/manifests/distributor/distributor.go @@ -245,6 +245,7 @@ func deployment(params manifestutils.Params) *v1.Deployment { "-target=distributor", "-config.file=/conf/tempo.yaml", "-log.level=info", + "-config.expand-env=true", }, Ports: containerPorts, ReadinessProbe: manifestutils.TempoReadinessProbe(params.CtrlConfig.Gates.HTTPEncryption), diff --git a/internal/manifests/ingester/ingester.go b/internal/manifests/ingester/ingester.go index 51f9d0325..aa0b27249 100644 --- a/internal/manifests/ingester/ingester.go +++ b/internal/manifests/ingester/ingester.go @@ -94,6 +94,7 @@ func statefulSet(params manifestutils.Params) (*v1.StatefulSet, error) { "-target=ingester", "-config.file=/conf/tempo.yaml", "-log.level=info", + "-config.expand-env=true", }, VolumeMounts: []corev1.VolumeMount{ { diff --git a/internal/manifests/memberlist/gossip.go b/internal/manifests/memberlist/gossip.go index f898359b3..c5c78f64a 100644 --- a/internal/manifests/memberlist/gossip.go +++ b/internal/manifests/memberlist/gossip.go @@ -1,10 +1,12 @@ package memberlist import ( + "github.com/imdario/mergo" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8slabels "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/intstr" + "strings" "github.com/grafana/tempo-operator/apis/tempo/v1alpha1" "github.com/grafana/tempo-operator/internal/manifests/manifestutils" @@ -12,7 +14,8 @@ import ( ) const ( - componentName = "gossip-ring" + componentName = "gossip-ring" + GossipInstanceAddrEnvVarName = "HASH_RING_INSTANCE_ADDR" ) var ( @@ -46,3 +49,55 @@ func BuildGossip(tempo v1alpha1.TempoStack) *corev1.Service { }, } } + +func resetEnvVar(podSpec *corev1.PodSpec, name string) { + for i, container := range podSpec.Containers { + found, index := findEnvVar(name, container.Env) + if found { + podSpec.Containers[i].Env = append(podSpec.Containers[i].Env[:index], podSpec.Containers[i].Env[index+1:]...) + } + } +} + +func findEnvVar(name string, envVars []corev1.EnvVar) (bool, int) { + for i, env := range envVars { + if env.Name == name || env.Name == strings.ToLower(name) { + return true, i + } + } + return false, 0 +} + +func configureHashRingEnv(p *corev1.PodSpec, tempo v1alpha1.TempoStack) error { + resetEnvVar(p, GossipInstanceAddrEnvVarName) + + memberList := tempo.Spec.HashRing.MemberList + enableIPV6 := memberList.EnableIPv6 != nil && *memberList.EnableIPv6 + + if !enableIPV6 && memberList.InstanceAddrType != v1alpha1.InstanceAddrPodIP { + return nil + } + + src := corev1.Container{ + Env: []corev1.EnvVar{ + { + Name: GossipInstanceAddrEnvVarName, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.podIP", + }, + }, + }, + }, + } + + for i, dst := range p.Containers { + if err := mergo.Merge(&dst, src, mergo.WithAppendSlice); err != nil { + return err + } + p.Containers[i] = dst + } + + return nil +}