Skip to content

Commit

Permalink
Merge pull request #677 from ArangoGutierrez/enhance_e2e_logging
Browse files Browse the repository at this point in the history
Enhance e2e logging
  • Loading branch information
elezar authored Apr 24, 2024
2 parents c727485 + 806a562 commit d5b7d4c
Show file tree
Hide file tree
Showing 13 changed files with 107 additions and 347 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ deployments/helm/gpu-feature-discovery
e2e_logs

*.out
*.log
4 changes: 3 additions & 1 deletion tests/e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,7 @@ e2e-test:
-image.pull-policy=$(E2E_IMAGE_PULL_POLICY) \
-log-artifacts=$(LOG_ARTIFACTS) \
-helm-chart=$(HELM_CHART) \
-helm-log-file=$(LOG_ARTIFACTS)/helm.log \
-ginkgo.focus="\[nvidia\]" \
-test.timeout=1h
-test.timeout=1h \
-ginkgo.v
9 changes: 5 additions & 4 deletions tests/e2e/common/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,18 +134,19 @@ func CleanupNode(ctx context.Context, cs clientset.Interface) {
}

if updateStatus {
By("Deleting NFD extended resources from node " + nodeName)
By("[Cleanup]\tDeleting NFD extended resources from node " + nodeName)
if _, err := cs.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
return err
}
}

if update {
By("Deleting NFD labels, annotations and taints from node " + node.Name)
By("[Cleanup]\tDeleting NFD labels, annotations and taints from node " + node.Name)
if _, err := cs.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}); err != nil {
return err
}
}

return nil
}

Expand Down Expand Up @@ -180,7 +181,7 @@ func cleanupNodeFeatures(ctx context.Context, cli *nfdclient.Clientset, namespac
Expect(err).NotTo(HaveOccurred())

if len(nfs.Items) != 0 {
By("Deleting NodeFeature objects from namespace " + namespace)
By("[Cleanup]\tDeleting NodeFeature objects from namespace " + namespace)
for _, nf := range nfs.Items {
err = cli.NfdV1alpha1().NodeFeatures(namespace).Delete(ctx, nf.Name, metav1.DeleteOptions{})
if errors.IsNotFound(err) {
Expand All @@ -202,7 +203,7 @@ func cleanupNodeFeatureRules(ctx context.Context, cli *nfdclient.Clientset) {
Expect(err).NotTo(HaveOccurred())

if len(nfrs.Items) != 0 {
By("Deleting NodeFeatureRule objects from the cluster")
By("[Cleanup]\tDeleting NodeFeatureRule objects from the cluster")
for _, nfr := range nfrs.Items {
err = cli.NfdV1alpha1().NodeFeatureRules().Delete(ctx, nfr.Name, metav1.DeleteOptions{})
if errors.IsNotFound(err) {
Expand Down
50 changes: 14 additions & 36 deletions tests/e2e/device-plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package e2e
import (
"context"
"fmt"
"os"
"strings"
"time"

Expand All @@ -36,7 +35,6 @@ import (
"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common"
"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common/diagnostics"
"github.com/NVIDIA/k8s-device-plugin/tests/e2e/framework"
e2elog "github.com/NVIDIA/k8s-device-plugin/tests/e2e/framework/logs"
)

// Actual test suite
Expand All @@ -54,10 +52,8 @@ var _ = NVDescribe("GPU Device Plugin", func() {
crds []*apiextensionsv1.CustomResourceDefinition
extClient *extclient.Clientset

helmClient helm.Client
chartSpec helm.ChartSpec
helmReleaseName string
kubeconfig []byte
chartSpec helm.ChartSpec

collectLogsFrom []string
diagnosticsCollector *diagnostics.Diagnostic
Expand All @@ -68,7 +64,7 @@ var _ = NVDescribe("GPU Device Plugin", func() {
"nodes",
"namespaces",
"deployments",
"demonsets",
"daemonsets",
"jobs",
}

Expand All @@ -91,25 +87,13 @@ var _ = NVDescribe("GPU Device Plugin", func() {
}

BeforeAll(func(ctx context.Context) {
var err error
// Create clients for apiextensions and our CRD api
extClient = extclient.NewForConfigOrDie(f.ClientConfig())
helmReleaseName = "nvdp-e2e-test" + rand.String(5)
kubeconfig, err = os.ReadFile(os.Getenv("KUBECONFIG"))
Expect(err).NotTo(HaveOccurred())
})

JustBeforeEach(func(ctx context.Context) {
// reset Helm Client
var err error
opt := &helm.KubeConfClientOptions{
Options: &helm.Options{
Namespace: f.Namespace.Name,
RepositoryCache: "/tmp/.helmcache",
RepositoryConfig: "/tmp/.helmrepo",
},
KubeConfig: kubeconfig,
}
chartSpec = helm.ChartSpec{
ReleaseName: helmReleaseName,
ChartName: *HelmChart,
Expand All @@ -119,13 +103,12 @@ var _ = NVDescribe("GPU Device Plugin", func() {
ValuesOptions: values,
CleanupOnFail: true,
}
helmClient, err = helm.NewClientFromKubeConf(opt)
Expect(err).NotTo(HaveOccurred())
_, err = helmClient.InstallChart(ctx, &chartSpec, nil)

By("Installing k8s-device-plugin Helm chart")
_, err := f.HelmClient.InstallChart(ctx, &chartSpec, nil)
Expect(err).NotTo(HaveOccurred())
})

// Cleanup before next test run
AfterEach(func(ctx context.Context) {
// Run diagnostic collector if test failed
if CurrentSpecReport().Failed() {
Expand All @@ -136,16 +119,14 @@ var _ = NVDescribe("GPU Device Plugin", func() {
diagnostics.WithKubernetesClient(f.ClientSet),
diagnostics.WithObjects(collectLogsFrom...),
)
if err != nil {
e2elog.Logf("Failed to create diagnostic collector: %v", err)
} else {
if err = diagnosticsCollector.Collect(ctx); err != nil {
e2elog.Logf("Diagnostic collector failed: %v", err)
}
}
Expect(err).NotTo(HaveOccurred())

err = diagnosticsCollector.Collect(ctx)
Expect(err).NotTo(HaveOccurred())
}
// Cleanup before next test run
// Delete Helm release
err := helmClient.UninstallReleaseByName(helmReleaseName)
err := f.HelmClient.UninstallReleaseByName(helmReleaseName)
Expect(err).NotTo(HaveOccurred())
})

Expand All @@ -158,7 +139,6 @@ var _ = NVDescribe("GPU Device Plugin", func() {

Context("and NV Driver is installed", func() {
It("it should create nvidia.com/gpu resource", func(ctx context.Context) {
By("Getting node objects")
nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
Expect(err).NotTo(HaveOccurred())
Expect(len(nodeList.Items)).ToNot(BeZero())
Expand All @@ -170,23 +150,21 @@ var _ = NVDescribe("GPU Device Plugin", func() {
targetNodeName := nodes[0].Name
Expect(targetNodeName).ToNot(BeEmpty(), "No suitable worker node found")

By("Check node capacity")
By("Checking the node capacity")
capacityChecker := map[string]k8sLabels{
targetNodeName: {
"nvidia.com/gpu": "^[1-9]$",
}}
e2elog.Logf("verifying capacity of node %q...", targetNodeName)
eventuallyNonControlPlaneNodes(ctx, f.ClientSet).Should(MatchCapacity(capacityChecker, nodes))
eventuallyNonControlPlaneNodes(ctx, f.ClientSet).Should(MatchCapacity(capacityChecker, nodes), "Node capacity does not match")
})
It("it should run GPU jobs", func(ctx context.Context) {
By("Creating GPU job")
By("Creating a GPU job")
job := common.GPUJob.DeepCopy()
job.Namespace = f.Namespace.Name
_, err := f.ClientSet.BatchV1().Jobs(f.Namespace.Name).Create(ctx, job, metav1.CreateOptions{})
Expect(err).NotTo(HaveOccurred())

By("Waiting for job to complete")

Eventually(func() error {
job, err := f.ClientSet.BatchV1().Jobs(f.Namespace.Name).Get(ctx, job.Name, metav1.GetOptions{})
if err != nil {
Expand Down
13 changes: 2 additions & 11 deletions tests/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,14 @@ package e2e

import (
"flag"
"log"
"os"
"testing"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/klog/v2"

"github.com/NVIDIA/k8s-device-plugin/tests/e2e/framework"
e2elog "github.com/NVIDIA/k8s-device-plugin/tests/e2e/framework/logs"
)

var (
Expand All @@ -44,28 +42,21 @@ func TestMain(m *testing.M) {
// Register test flags, then parse flags.
framework.RegisterClusterFlags(flag.CommandLine)
flag.Parse()
klog.SetOutput(ginkgo.GinkgoWriter)

// check if flags are set and if not cancel the test run
if *ImageRepo == "" || *ImageTag == "" || *HelmChart == "" {
e2elog.Failf("Required flags not set. Please set -image.repo, -image.tag and -helm-chart")
log.Fatal("Required flags not set. Please set -image.repo, -image.tag and -helm-chart")
}

os.Exit(m.Run())
}

func TestE2E(t *testing.T) {
e2elog.InitLogs()
defer e2elog.FlushLogs()
klog.EnableContextualLogging(true)
gomega.RegisterFailHandler(ginkgo.Fail)
// Run tests through the Ginkgo runner with output to console + JUnit for Jenkins
suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration()
// Randomize specs as well as suites
suiteConfig.RandomizeAllSpecs = true

var runID = uuid.NewUUID()

klog.Infof("Starting e2e run %q on Ginkgo node %d", runID, suiteConfig.ParallelProcess)
ginkgo.RunSpecs(t, "nvidia k8s-device-plugin e2e suite", suiteConfig, reporterConfig)
}
4 changes: 3 additions & 1 deletion tests/e2e/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

package e2e

import "github.com/onsi/ginkgo/v2"
import (
"github.com/onsi/ginkgo/v2"
)

// NVDescribe annotates the test with the NVIDIA label.
func NVDescribe(text string, body func()) bool {
Expand Down
103 changes: 0 additions & 103 deletions tests/e2e/framework/expect.go

This file was deleted.

Loading

0 comments on commit d5b7d4c

Please sign in to comment.