[YUNIKORN-1126] Add e2e tests for user and group limits with wildcard (…

…apache#909) Closes: apache#909 Signed-off-by: Craig Condit <[email protected]>
chenyulin0719 · Sep 6, 2024 · b335500 · b335500
1 parent 58c0c36
commit b335500
Show file tree

Hide file tree

Showing 3 changed files with 246 additions and 0 deletions.
diff --git a/test/e2e/framework/configmanager/constants.go b/test/e2e/framework/configmanager/constants.go
@@ -46,6 +46,7 @@ const (
 	NodesPath         = "ws/v1/partition/%s/nodes"
 	UserUsagePath     = "ws/v1/partition/%s/usage/user/%s"
 	GroupUsagePath    = "ws/v1/partition/%s/usage/group/%s"
+	GroupsUsagePath   = "ws/v1/partition/%s/usage/groups"
 	HealthCheckPath   = "ws/v1/scheduler/healthcheck"
 	ValidateConfPath  = "ws/v1/validate-conf"
 	FullStateDumpPath = "ws/v1/fullstatedump"

diff --git a/test/e2e/framework/helpers/yunikorn/rest_api_utils.go b/test/e2e/framework/helpers/yunikorn/rest_api_utils.go
@@ -536,3 +536,13 @@ func (c *RClient) GetGroupUsage(partition string, groupName string) (*dao.GroupR
 	_, err = c.do(req, &groupUsage)
 	return groupUsage, err
 }
+
+func (c *RClient) GetGroupsUsage(partition string) ([]*dao.GroupResourceUsageDAOInfo, error) {
+	req, err := c.newRequest("GET", fmt.Sprintf(configmanager.GroupsUsagePath, partition), nil)
+	if err != nil {
+		return nil, err
+	}
+	var groupsUsage []*dao.GroupResourceUsageDAOInfo
+	_, err = c.do(req, &groupsUsage)
+	return groupsUsage, err
+}
diff --git a/test/e2e/user_group_limit/user_group_limit_test.go b/test/e2e/user_group_limit/user_group_limit_test.go
@@ -708,6 +708,209 @@ var _ = ginkgo.Describe("UserGroupLimit", func() {
 			return nil
 		})
 	})
+
+	ginkgo.It("Verify user limit and wildcard user limit", func() {
+		ginkgo.By("Update config")
+		// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
+		yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
+			yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
+				// remove placement rules so we can control queue
+				sc.Partitions[0].PlacementRules = nil
+				err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
+					Name: "sandbox1",
+					Limits: []configs.Limit{
+						{
+							Limit:           "user entry",
+							Users:           []string{user1},
+							MaxApplications: 1,
+							MaxResources: map[string]string{
+								siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
+							},
+						},
+						{
+							Limit:           "wildcard user entry",
+							Users:           []string{"*"},
+							MaxApplications: 2,
+							MaxResources: map[string]string{
+								siCommon.Memory: fmt.Sprintf("%dM", largeMem),
+							},
+						},
+					},
+				})
+				if err != nil {
+					return err
+				}
+				return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
+			})
+		})
+
+		// usergroup1 can deploy the first sleep pod to root.sandbox1
+		usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}}
+
+		// usergroup1 can't deploy the second sleep pod to root.sandbox1
+		usergroup1Sandbox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because memory usage is less than user entry limit")
+		_ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than user entry limit")
+		checkUsage(userTestType, user1, sandboxQueue1, []*v1.Pod{usergroup1Sandbox1Pod1})
+
+		// usergroup2 can deploy 2 sleep pods to root.sandbox1
+		usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}}
+		usergroup2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2")
+		checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1})
+
+		// usergroup2 can deploy the second sleep pod to root.sandbox1
+		usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
+		usergroup2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for usergroup2")
+		checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2})
+
+		// usergroup2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit
+		usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
+		usergroup2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
+		checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2})
+
+		// Update Wildcard user entry limit to 3
+		ginkgo.By("Update config")
+		// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
+		yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
+			yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
+				// remove placement rules so we can control queue
+				sc.Partitions[0].PlacementRules = nil
+
+				err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
+					Name: "sandbox1",
+					Limits: []configs.Limit{
+						{
+							Limit:           "user entry",
+							Users:           []string{user1},
+							MaxApplications: 1,
+							MaxResources: map[string]string{
+								siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
+							},
+						},
+						{
+							Limit:           "wildcard user entry",
+							Users:           []string{"*"},
+							MaxApplications: 3,
+							MaxResources: map[string]string{
+								siCommon.Memory: fmt.Sprintf("%dM", largeMem),
+							},
+						},
+					},
+				})
+				if err != nil {
+					return err
+				}
+				return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
+			})
+		})
+		// usergroup2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3
+		checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3})
+		// usergroup2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit
+		usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
+		_ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
+		checkUsage(userTestType, user2, sandboxQueue1, []*v1.Pod{usergroup2Sandbox1Pod1, usergroup2Sandbox1Pod2, usergroup2Sandbox1Pod3})
+
+	})
+
+	ginkgo.It("Verify group limit and wildcard group limit", func() {
+		ginkgo.By("Update config")
+		// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
+		yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
+			yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
+				// remove placement rules so we can control queue
+				sc.Partitions[0].PlacementRules = nil
+
+				err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
+					Name: "sandbox1",
+					Limits: []configs.Limit{
+						{
+							Limit:           "group entry",
+							Groups:          []string{group1},
+							MaxApplications: 1,
+							MaxResources: map[string]string{
+								siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
+							},
+						},
+						{
+							Limit:           "wildcard group entry",
+							Groups:          []string{"*"},
+							MaxApplications: 2,
+							MaxResources: map[string]string{
+								siCommon.Memory: fmt.Sprintf("%dM", largeMem),
+							},
+						},
+					}})
+				if err != nil {
+					return err
+				}
+				return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
+			})
+		})
+		// group1 can deploy the first sleep pod to root.sandbox1
+		usergroup1 := &si.UserGroupInformation{User: user1, Groups: []string{group1}}
+		group1Sandvox1Pod1 := deploySleepPod(usergroup1, sandboxQueue1, true, "because there is no limit for group1")
+		checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1})
+
+		// group1 can't deploy the second sleep pod to root.sandbox1
+		usergroup1 = &si.UserGroupInformation{User: user1, Groups: []string{group1}}
+		_ = deploySleepPod(usergroup1, sandboxQueue1, false, "because final memory usage is more than group entry limit")
+		checkUsage(groupTestType, group1, sandboxQueue1, []*v1.Pod{group1Sandvox1Pod1})
+
+		// group2 can deploy 2 sleep pods to root.sandbox1
+		usergroup2 := &si.UserGroupInformation{User: user2, Groups: []string{group2}}
+		group2Sandbox1Pod1 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2")
+		checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1})
+
+		// group2 can deploy the second sleep pod to root.sandbox1
+		usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
+		group2Sandbox1Pod2 := deploySleepPod(usergroup2, sandboxQueue1, true, "because there is no limit for group2")
+		checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2})
+
+		// group2 can't deploy the third sleep pod to root.sandbox1 because of max-application limit
+		usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
+		group2Sandbox1Pod3 := deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
+		checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2})
+		// Update Wildcard group entry limit to 3
+		ginkgo.By("Update config")
+		// The wait wrapper still can't fully guarantee that the config in AdmissionController has been updated.
+		yunikorn.WaitForAdmissionControllerRefreshConfAfterAction(func() {
+			yunikorn.UpdateCustomConfigMapWrapperWithMap(oldConfigMap, "", admissionCustomConfig, func(sc *configs.SchedulerConfig) error {
+				// remove placement rules so we can control queue
+				sc.Partitions[0].PlacementRules = nil
+
+				err := common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{
+					Name: "sandbox1",
+					Limits: []configs.Limit{
+						{
+							Limit:           "group entry",
+							Groups:          []string{group1},
+							MaxApplications: 1,
+							MaxResources: map[string]string{
+								siCommon.Memory: fmt.Sprintf("%dM", mediumMem),
+							},
+						},
+						{
+							Limit:           "wildcard group entry",
+							Groups:          []string{"*"},
+							MaxApplications: 3,
+							MaxResources: map[string]string{
+								siCommon.Memory: fmt.Sprintf("%dM", largeMem),
+							},
+						},
+					}})
+				if err != nil {
+					return err
+				}
+				return common.AddQueue(sc, constants.DefaultPartition, constants.RootQueue, configs.QueueConfig{Name: "sandbox2"})
+			})
+		})
+		// group2 can deploy the third sleep pod to root.sandbox1 becuase of max-application limit updated to 3
+		checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3})
+		// group2 can't deploy the fourth sleep pod to root.sandbox1 because of max-application limit
+		usergroup2 = &si.UserGroupInformation{User: user2, Groups: []string{group2}}
+		_ = deploySleepPod(usergroup2, sandboxQueue1, false, "because final memory usage is more than wildcard maxapplications")
+		checkUsageWildcardGroups(groupTestType, group2, sandboxQueue1, []*v1.Pod{group2Sandbox1Pod1, group2Sandbox1Pod2, group2Sandbox1Pod3})
+	})
+
 	ginkgo.AfterEach(func() {
 		tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns.Name})
 
@@ -787,3 +990,35 @@ func checkUsage(testType TestType, name string, queuePath string, expectedRunnin
 	Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods))))
 	Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...))
 }
+
+func checkUsageWildcardGroups(testType TestType, name string, queuePath string, expectedRunningPods []*v1.Pod) {
+	var rootQueueResourceUsageDAO *dao.ResourceUsageDAOInfo
+	if testType == groupTestType {
+		ginkgo.By(fmt.Sprintf("Check group resource usage for %s in queue %s", name, queuePath))
+		groupUsageDAOInfo, err := restClient.GetGroupsUsage(constants.DefaultPartition)
+		Ω(err).NotTo(gomega.HaveOccurred())
+		Ω(groupUsageDAOInfo).NotTo(gomega.BeNil())
+		for _, groupUsageDAOInfog := range groupUsageDAOInfo {
+			if groupUsageDAOInfog.GroupName == "*" {
+				rootQueueResourceUsageDAO = groupUsageDAOInfog.Queues
+			}
+		}
+	}
+	Ω(rootQueueResourceUsageDAO).NotTo(gomega.BeNil())
+	var resourceUsageDAO *dao.ResourceUsageDAOInfo
+	for _, queue := range rootQueueResourceUsageDAO.Children {
+		if queue.QueuePath == queuePath {
+			resourceUsageDAO = queue
+			break
+		}
+	}
+	Ω(resourceUsageDAO).NotTo(gomega.BeNil())
+
+	appIDs := make([]interface{}, 0, len(expectedRunningPods))
+	for _, pod := range expectedRunningPods {
+		appIDs = append(appIDs, pod.Labels[constants.LabelApplicationID])
+	}
+	Ω(resourceUsageDAO.ResourceUsage).NotTo(gomega.BeNil())
+	Ω(resourceUsageDAO.ResourceUsage.Resources["pods"]).To(gomega.Equal(resources.Quantity(len(expectedRunningPods))))
+	Ω(resourceUsageDAO.RunningApplications).To(gomega.ConsistOf(appIDs...))
+}