This repository has been archived by the owner on Dec 17, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
get_data.py
71 lines (55 loc) · 1.89 KB
/
get_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import json
import os
from collections import Counter
from datetime import datetime
from pathlib import Path
import fire
import yaml
from github import Github
from loguru import logger
def clean_labels(labels):
return [
x.name.replace("A: ", "")
for x in labels
]
@logger.catch(reraise=True)
def get_data(output_folder):
with open("params.yaml") as f:
params = yaml.safe_load(f)["data"]
output_folder = Path(output_folder)
for label in params["labels"]:
(output_folder / label).mkdir(parents=True, exist_ok=True)
since = datetime(*map(int, params["since"].split("/")))
until = datetime(*map(int, params["until"].split("/")))
logger.info(f"Getting issue labels since {since} until {until}")
logger.info("Initializing Github")
if os.environ.get("GITHUB_TOKEN"):
g = Github(os.environ["GITHUB_TOKEN"])
else:
g = Github()
logger.info(f"Querying repo: {params['repo']}")
repo = g.get_repo(params["repo"])
metrics = Counter()
for issue in repo.get_issues(since=since):
issue_labels = [
label
for label in clean_labels(issue.labels)
if label in params["labels"]
]
if (
issue.pull_request
or issue.created_at > until
or len(issue_labels) != 1
):
logger.debug(f"Skipping issue: {issue.title}")
logger.debug(f"Created at: {issue.created_at}")
logger.debug(f"Labels: {issue.labels}")
continue
label = str(issue_labels[0])
logger.info(f"\nTITLE: {issue.title}\nLABEL: {label}")
output_file = output_folder / label / f"{issue.number}.txt"
output_file.write_text(f"{issue.title}")
metrics[label] += 1
Path(params["metrics_file"]).write_text(json.dumps(metrics, indent=4))
if __name__ == "__main__":
fire.Fire(get_data)