Skip to content

Commit

Permalink
test: Windows HNS e2e and no crashes step (#789)
Browse files Browse the repository at this point in the history
# Description

Please provide a brief description of the changes made in this pull
request.

## Related Issue

If this pull request is related to any issue, please mention it here.
Additionally, make sure that the issue is assigned to you before
submitting this pull request.

## Checklist

- [ ] I have read the [contributing
documentation](https://retina.sh/docs/contributing).
- [ ] I signed and signed-off the commits (`git commit -S -s ...`). See
[this
documentation](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification)
on signing commits.
- [ ] I have correctly attributed the author(s) of the code.
- [ ] I have tested the changes locally.
- [ ] I have followed the project's style guidelines.
- [ ] I have updated the documentation, if necessary.
- [ ] I have added tests, if applicable.

## Screenshots (if applicable) or Testing Completed

Please add any relevant screenshots or GIFs to showcase the changes
made.

## Additional Notes

Add any additional notes or context about the pull request here.

---

Please refer to the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more
information on how to contribute to this project.
  • Loading branch information
matmerr authored Oct 1, 2024
1 parent 1f19ed0 commit 3b7787b
Show file tree
Hide file tree
Showing 15 changed files with 400 additions and 63 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
*.so
*.dylib

# Avoid checking in keys
*.pem

# Test binary, built with `go test -c`
*.test

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,15 @@ spec:
fieldRef:
apiVersion: v1
fieldPath: status.hostIP
livenessProbe:
httpGet:
path: /metrics
port: {{ .Values.retinaPort }}
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }}
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "3" }}
successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }}
securityContext:
capabilities:
add:
Expand Down
16 changes: 16 additions & 0 deletions test/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,19 @@ For reference, see the `test-all` recipe in the root [Makefile](../../Makefile).

For sample test, please check out:
[the Retina E2E.](./scenarios/retina/drop/scenario.go)

## Sample VSCode `settings.json` for running with existing cluster

```json
"go.testFlags": [
"-v",
"-timeout=40m",
"-tags=e2e",
"-args",
"-create-infra=false",
"-delete-infra=false",
"-image-namespace=retistrynamespace",
"-image-registry=yourregistry",
"-image-tag=yourtesttag",
],
```
3 changes: 3 additions & 0 deletions test/e2e/framework/kubernetes/create-kapinger-deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
},

Spec: v1.PodSpec{
NodeSelector: map[string]string{
"kubernetes.io/os": "linux",
},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
// prefer an even spread across the cluster to avoid scheduling on the same node
Expand Down
40 changes: 22 additions & 18 deletions test/e2e/framework/kubernetes/exec-pod.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package kubernetes

import (
"bytes"
"context"
"fmt"
"log"
Expand All @@ -9,6 +10,7 @@ import (

v1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/tools/remotecommand"
"k8s.io/kubectl/pkg/scheme"
Expand All @@ -27,7 +29,17 @@ func (e *ExecInPod) Run() error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

err := ExecPod(ctx, e.KubeConfigFilePath, e.PodNamespace, e.PodName, e.Command)
config, err := clientcmd.BuildConfigFromFlags("", e.KubeConfigFilePath)
if err != nil {
return fmt.Errorf("error building kubeconfig: %w", err)
}

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

_, err = ExecPod(ctx, clientset, config, e.PodNamespace, e.PodName, e.Command)
if err != nil {
return fmt.Errorf("error executing command [%s]: %w", e.Command, err)
}
Expand All @@ -43,17 +55,8 @@ func (e *ExecInPod) Stop() error {
return nil
}

func ExecPod(ctx context.Context, kubeConfigFilePath, namespace, podName, command string) error {
config, err := clientcmd.BuildConfigFromFlags("", kubeConfigFilePath)
if err != nil {
return fmt.Errorf("error building kubeconfig: %w", err)
}

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

func ExecPod(ctx context.Context, clientset *kubernetes.Clientset, config *rest.Config, namespace, podName, command string) ([]byte, error) {
log.Printf("executing command \"%s\" on pod \"%s\" in namespace \"%s\"...", command, podName, namespace)
req := clientset.CoreV1().RESTClient().Post().Resource("pods").Name(podName).
Namespace(namespace).SubResource(ExecSubResources)
option := &v1.PodExecOptions{
Expand All @@ -69,20 +72,21 @@ func ExecPod(ctx context.Context, kubeConfigFilePath, namespace, podName, comman
scheme.ParameterCodec,
)

var buf bytes.Buffer
exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL())
if err != nil {
return fmt.Errorf("error creating executor: %w", err)
return buf.Bytes(), fmt.Errorf("error creating executor: %w", err)
}

log.Printf("executing command \"%s\" on pod \"%s\" in namespace \"%s\"...", command, podName, namespace)
err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
Stdout: &buf,
Stderr: &buf,
})
if err != nil {
return fmt.Errorf("error executing command: %w", err)
return buf.Bytes(), fmt.Errorf("error executing command: %w", err)
}

return nil
res := buf.Bytes()
return res, nil
}
20 changes: 17 additions & 3 deletions test/e2e/framework/kubernetes/get-logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,16 @@ import (
"k8s.io/client-go/tools/clientcmd"
)

func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {
type GetPodLogs struct {
KubeConfigFilePath string
Namespace string
LabelSelector string
}

func (p *GetPodLogs) Run() error {
fmt.Printf("printing pod logs for namespace: %s, labelselector: %s\n", p.Namespace, p.LabelSelector)
// Load the kubeconfig file to get the configuration to access the cluster
config, err := clientcmd.BuildConfigFromFlags("", kubeconfigpath)
config, err := clientcmd.BuildConfigFromFlags("", p.KubeConfigFilePath)
if err != nil {
log.Printf("error building kubeconfig: %s\n", err)
}
Expand All @@ -25,8 +32,14 @@ func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {
log.Printf("error creating clientset: %s\n", err)
}

PrintPodLogs(context.Background(), clientset, p.Namespace, p.LabelSelector)

return nil
}

func PrintPodLogs(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) {
// List all the pods in the namespace
pods, err := clientset.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
Expand Down Expand Up @@ -55,5 +68,6 @@ func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {

// Print the logs
log.Println(string(buf))
fmt.Printf("#######################################################\n")
}
}
26 changes: 24 additions & 2 deletions test/e2e/framework/kubernetes/install-retina-helm.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package kubernetes

import (
"context"
"fmt"
"log"
"os"
Expand All @@ -11,10 +12,12 @@ import (
"helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/chart/loader"
"helm.sh/helm/v3/pkg/cli"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)

const (
createTimeout = 240 * time.Second // windpws is slow
createTimeout = 20 * time.Minute // windows is slow
deleteTimeout = 60 * time.Second
)

Expand All @@ -32,6 +35,8 @@ type InstallHelmChart struct {
}

func (i *InstallHelmChart) Run() error {
ctx, cancel := context.WithTimeout(context.Background(), createTimeout)
defer cancel()
settings := cli.New()
settings.KubeConfig = i.KubeConfigFilePath
actionConfig := new(action.Configuration)
Expand Down Expand Up @@ -97,7 +102,7 @@ func (i *InstallHelmChart) Run() error {
client.WaitForJobs = true

// install the chart here
rel, err := client.Run(chart, chart.Values)
rel, err := client.RunWithContext(ctx, chart, chart.Values)
if err != nil {
return fmt.Errorf("failed to install chart: %w", err)
}
Expand All @@ -106,6 +111,23 @@ func (i *InstallHelmChart) Run() error {
// this will confirm the values set during installation
log.Printf("chart values: %v\n", rel.Config)

// ensure all pods are running, since helm doesn't care about windows
config, err := clientcmd.BuildConfigFromFlags("", i.KubeConfigFilePath)
if err != nil {
return fmt.Errorf("error building kubeconfig: %w", err)
}

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

labelSelector := "k8s-app=retina"
err = WaitForPodReady(ctx, clientset, "kube-system", labelSelector)
if err != nil {
return fmt.Errorf("error waiting for retina pods to be ready: %w", err)
}

return nil
}

Expand Down
43 changes: 43 additions & 0 deletions test/e2e/framework/kubernetes/no-crashes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package kubernetes

import (
"context"
"fmt"

"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)

var ErrPodCrashed = fmt.Errorf("pod has crashes")

type EnsureStableCluster struct {
LabelSelector string
PodNamespace string
KubeConfigFilePath string
}

func (n *EnsureStableCluster) Run() error {
config, err := clientcmd.BuildConfigFromFlags("", n.KubeConfigFilePath)
if err != nil {
return fmt.Errorf("error building kubeconfig: %w", err)
}

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

err = WaitForPodReady(context.TODO(), clientset, n.PodNamespace, n.LabelSelector)
if err != nil {
return fmt.Errorf("error waiting for retina pods to be ready: %w", err)
}
return nil
}

func (n *EnsureStableCluster) Prevalidate() error {
return nil
}

func (n *EnsureStableCluster) Stop() error {
return nil
}
18 changes: 14 additions & 4 deletions test/e2e/framework/kubernetes/port-forward.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

retry "github.com/microsoft/retina/test/retry"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
Expand Down Expand Up @@ -120,14 +121,23 @@ func (p *PortForward) Run() error {
}

func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kubernetes.Clientset) (string, error) {
targetPods, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
targetPodsAll, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
LabelSelector: p.LabelSelector,
FieldSelector: "status.phase=Running",
})
if errAffinity != nil {
return "", fmt.Errorf("could not list pods in %q with label %q: %w", p.Namespace, p.LabelSelector, errAffinity)
}

// omit windows pods because we can't port-forward to them
targetPodsLinux := make([]v1.Pod, 0)
for i := range targetPodsAll.Items {
if targetPodsAll.Items[i].Spec.NodeSelector["kubernetes.io/os"] != "windows" {
targetPodsLinux = append(targetPodsLinux, targetPodsAll.Items[i])
}
}

// get all pods with optional label affinity
affinityPods, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
LabelSelector: p.OptionalLabelAffinity,
FieldSelector: "status.phase=Running",
Expand All @@ -143,10 +153,10 @@ func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kuber
}

// if a pod is found on the same node as an affinity pod, use it
for i := range targetPods.Items {
if affinityNodes[targetPods.Items[i].Spec.NodeName] {
for i := range targetPodsLinux {
if affinityNodes[targetPodsLinux[i].Spec.NodeName] {
// found a pod with the specified label, on a node with the optional label affinity
return targetPods.Items[i].Name, nil
return targetPodsLinux[i].Name, nil
}
}

Expand Down
19 changes: 18 additions & 1 deletion test/e2e/framework/kubernetes/wait-pod-ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@ import (
const (
RetryTimeoutPodsReady = 5 * time.Minute
RetryIntervalPodsReady = 5 * time.Second

printInterval = 5 // print to stdout every 5 iterations
)

func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error {
podReadyMap := make(map[string]bool)

printIterator := 0
conditionFunc := wait.ConditionWithContextFunc(func(context.Context) (bool, error) {
defer func() {
printIterator++
}()
var podList *corev1.PodList
podList, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
if err != nil {
Expand All @@ -40,11 +46,21 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names
return false, fmt.Errorf("error getting Pod: %w", err)
}

for istatus := range pod.Status.ContainerStatuses {
status := &pod.Status.ContainerStatuses[istatus]
if status.RestartCount > 0 {
return false, fmt.Errorf("pod %s has %d restarts: status: %+v: %w", pod.Name, status.RestartCount, status, ErrPodCrashed)
}
}

// Check the Pod phase
if pod.Status.Phase != corev1.PodRunning {
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
if printIterator%printInterval == 0 {
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
}
return false, nil
}

if !podReadyMap[pod.Name] {
log.Printf("pod \"%s\" is in Running state\n", pod.Name)
podReadyMap[pod.Name] = true
Expand All @@ -56,6 +72,7 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names

err := wait.PollUntilContextCancel(ctx, RetryIntervalPodsReady, true, conditionFunc)
if err != nil {
PrintPodLogs(ctx, clientset, namespace, labelSelector)
return fmt.Errorf("error waiting for pods in namespace \"%s\" with label \"%s\" to be in Running state: %w", namespace, labelSelector, err)
}
return nil
Expand Down
Loading

0 comments on commit 3b7787b

Please sign in to comment.