From 67fd05c520c8fa8d0cff0486ff4b0f89a5239986 Mon Sep 17 00:00:00 2001 From: Daniel Hiller Date: Mon, 7 Oct 2024 15:12:28 +0200 Subject: [PATCH] devstats,repo_groups: generate repo_groups.sql Signed-off-by: Daniel Hiller --- generators/cmd/devstats/main.go | 138 ++++++++++++++++++++ generators/cmd/devstats/repo_groups.gosql | 85 ++++++++++++ generators/cmd/devstats/repo_groups_test.go | 114 ++++++++++++++++ generators/cmd/devstats/types.go | 30 +++++ 4 files changed, 367 insertions(+) create mode 100644 generators/cmd/devstats/main.go create mode 100644 generators/cmd/devstats/repo_groups.gosql create mode 100644 generators/cmd/devstats/repo_groups_test.go create mode 100644 generators/cmd/devstats/types.go diff --git a/generators/cmd/devstats/main.go b/generators/cmd/devstats/main.go new file mode 100644 index 00000000..78c8b121 --- /dev/null +++ b/generators/cmd/devstats/main.go @@ -0,0 +1,138 @@ +/* + * This file is part of the KubeVirt project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright the KubeVirt Authors. + * + */ + +package main + +import ( + "bytes" + _ "embed" + "flag" + "fmt" + "kubevirt.io/community/pkg/sigs" + "log" + "os" + "regexp" + "sort" + "text/template" +) + +type options struct { + sigsYAMLPath string + outputPath string +} + +func (o *options) Validate() error { + if o.sigsYAMLPath == "" { + return fmt.Errorf("path to sigs.yaml is required") + } + if _, err := os.Stat(o.sigsYAMLPath); os.IsNotExist(err) { + return fmt.Errorf("file %s does not exist", o.sigsYAMLPath) + } + return nil +} + +func gatherOptions() options { + o := options{} + fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError) + fs.StringVar(&o.sigsYAMLPath, "sigs-yaml-path", "./sigs.yaml", "path to file sigs.yaml") + fs.StringVar(&o.outputPath, "output-path", "/tmp/repo_groups.sql", "path to file to write the output into") + err := fs.Parse(os.Args[1:]) + if err != nil { + log.Fatalf("error parsing arguments %v: %v", os.Args[1:], err) + } + return o +} + +var repoNameMatcher = regexp.MustCompile(`^https://raw.githubusercontent.com/([^/]+/[^/]+)/.*$`) + +func main() { + opts := gatherOptions() + if err := opts.Validate(); err != nil { + log.Fatalf("invalid arguments: %v", err) + } + + sigsYAML, err := sigs.ReadFile(opts.sigsYAMLPath) + if err != nil { + log.Fatalf("failed to read sigs.yaml: %v", err) + } + + var d RepoGroupsTemplateData + for _, sig := range sigsYAML.Sigs { + repoGroup := RepoGroup{ + Name: sig.Name, + Alias: sig.Dir, + } + repoMap := make(map[string]struct{}) + for _, subProject := range sig.SubProjects { + for _, ownerRef := range subProject.Owners { + stringSubmatch := repoNameMatcher.FindStringSubmatch(ownerRef) + if stringSubmatch == nil { + log.Fatalf("ownerRef %q doesn't match!", ownerRef) + } + repoName := stringSubmatch[1] + if _, exists := repoMap[repoName]; !exists { + repoMap[repoName] = struct{}{} + } + } + } + if len(repoMap) == 0 { + continue + } + var repos []string + for repo := range repoMap { + repos = append(repos, repo) + } + sort.Strings(repos) + repoGroup.Repos = repos + d.RepoGroups = append(d.RepoGroups, repoGroup) + } + + sql, err := generateRepoGroupsSQL(d) + if err != nil { + log.Fatal(fmt.Errorf("failed to generate sql: %w", err)) + } + + file, err := os.OpenFile(opts.outputPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666) + if err != nil { + log.Fatal(fmt.Errorf("failed to write to file %q, %w", opts.outputPath, err)) + } + defer file.Close() + _, err = file.WriteString(sql) + if err != nil { + log.Fatal(fmt.Errorf("failed to write to file %q, %w", opts.outputPath, err)) + } + + log.Printf("output written to %q", opts.outputPath) +} + +//go:embed repo_groups.gosql +var repoGroupsSQLTemplate string + +func generateRepoGroupsSQL(d RepoGroupsTemplateData) (string, error) { + templateInstance, err := template.New("repo_groups").Parse(repoGroupsSQLTemplate) + if err != nil { + return "", err + } + var output bytes.Buffer + err = templateInstance.Execute(&output, d) + if err != nil { + return "", err + } + return output.String(), nil +} diff --git a/generators/cmd/devstats/repo_groups.gosql b/generators/cmd/devstats/repo_groups.gosql new file mode 100644 index 00000000..62551f93 --- /dev/null +++ b/generators/cmd/devstats/repo_groups.gosql @@ -0,0 +1,85 @@ +{{- /* + This file is part of the KubeVirt project + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + + See the License for the specific language governing permissions and + limitations under the License. + + Copyright the KubeVirt Authors. + + */ +-}} +{{- /* gotype: kubevirt.io/community/generators/cmd/devstats.RepoGroupsTemplateData */ -}} +-- Add repository groups +with repo_latest as ( + select sub.repo_id, + sub.repo_name + from ( + select repo_id, + dup_repo_name as repo_name, + row_number() over (partition by repo_id order by created_at desc, id desc) as row_num + from + gha_events + ) sub + where + sub.row_num = 1 +) +update + gha_repos r +set + alias = ( + select rl.repo_name + from + repo_latest rl + where + rl.repo_id = r.id + ) +where + r.name like '%_/_%' + and r.name not like '%/%/%' +; +update gha_repos set repo_group = alias; + +insert into gha_repo_groups(id, name, alias, repo_group, org_id, org_login) select id, name, alias, coalesce(repo_group, name), org_id, org_login from gha_repos on conflict do nothing; +insert into gha_repo_groups(id, name, alias, repo_group, org_id, org_login) select id, name, alias, org_login, org_id, org_login from gha_repos where org_id is not null and org_login is not null and trim(org_login) != '' on conflict do nothing; + + +-- Per each SIG that has claimed ownership via one of it's subprojects we add a new entry in gha_repo_groups +{{ range $repoGroup := $.RepoGroups }} +insert into gha_repo_groups(id, name, repo_group, alias, org_id, org_login) +select id, name, '{{ $repoGroup.Name }}', alias, org_id, org_login + from gha_repo_groups + where lower(name) in ({{ range $repo := $repoGroup.Repos }} + '{{ $repo }}',{{ end }} + ) + on conflict update; +{{ end }} + +-- for the remaining rows where the repo_group has not been touched we set the default "Other" + +UPDATE gha_repos +SET repo_group = 'Other' +WHERE repo_group = alias + + +select + repo_group, + count(*) as number_of_repos +from + gha_repo_groups +where + repo_group is not null +group by + repo_group +order by + number_of_repos desc, + repo_group asc; \ No newline at end of file diff --git a/generators/cmd/devstats/repo_groups_test.go b/generators/cmd/devstats/repo_groups_test.go new file mode 100644 index 00000000..ff0b0798 --- /dev/null +++ b/generators/cmd/devstats/repo_groups_test.go @@ -0,0 +1,114 @@ +/* + * This file is part of the KubeVirt project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright the KubeVirt Authors. + * + */ + +package main + +import ( + "strings" + "testing" +) + +func TestRepoGroupsTemplate(t *testing.T) { + testCases := []struct { + name string + templateData RepoGroupsTemplateData + expectedOutputContained string + expectedErr error + }{ + { + name: "one group", + templateData: RepoGroupsTemplateData{ + RepoGroups: []RepoGroup{ + { + Name: "sig-testing", + Alias: "blah", + Repos: []string{ + "kubevirt/ci", + "kubevirt/test", + }, + }, + }, + }, + expectedOutputContained: `insert into gha_repo_groups(id, name, repo_group, alias, org_id, org_login) +select id, name, 'sig-testing', alias, org_id, org_login + from gha_repo_groups + where lower(name) in ( + 'kubevirt/ci', + 'kubevirt/test', + ) + on conflict update;`, + expectedErr: nil, + }, + { + name: "two groups", + templateData: RepoGroupsTemplateData{ + RepoGroups: []RepoGroup{ + { + Name: "sig-testing", + Alias: "blah", + Repos: []string{ + "kubevirt/kubevirt", + "kubevirt/test", + }, + }, + { + Name: "sig-ci", + Alias: "bled", + Repos: []string{ + "kubevirt/ci-health", + "kubevirt/kubevirtci", + }, + }, + }, + }, + expectedOutputContained: ` +insert into gha_repo_groups(id, name, repo_group, alias, org_id, org_login) +select id, name, 'sig-testing', alias, org_id, org_login + from gha_repo_groups + where lower(name) in ( + 'kubevirt/kubevirt', + 'kubevirt/test', + ) + on conflict update; + +insert into gha_repo_groups(id, name, repo_group, alias, org_id, org_login) +select id, name, 'sig-ci', alias, org_id, org_login + from gha_repo_groups + where lower(name) in ( + 'kubevirt/ci-health', + 'kubevirt/kubevirtci', + ) + on conflict update;`, + expectedErr: nil, + }, + } + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + sql, err := generateRepoGroupsSQL(testCase.templateData) + if !strings.Contains(sql, testCase.expectedOutputContained) { + t.Log(sql) + t.Errorf(`wanted output to contain: +%s`, testCase.expectedOutputContained) + } + if testCase.expectedErr != err { + t.Errorf("got %q, want %q", err, testCase.expectedErr) + } + }) + } +} diff --git a/generators/cmd/devstats/types.go b/generators/cmd/devstats/types.go new file mode 100644 index 00000000..c8977103 --- /dev/null +++ b/generators/cmd/devstats/types.go @@ -0,0 +1,30 @@ +/* + * This file is part of the KubeVirt project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright the KubeVirt Authors. + * + */ + +package main + +type RepoGroup struct { + Name string + Alias string + Repos []string +} + +type RepoGroupsTemplateData struct { + RepoGroups []RepoGroup +}