From a8e6cf22b05126839ee5773f86681ac673a9d37c Mon Sep 17 00:00:00 2001 From: SaveliyBorivets Date: Mon, 29 Jul 2024 01:28:31 +0300 Subject: [PATCH 1/4] iss53 forked commits exception --- commits_parser.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/commits_parser.py b/commits_parser.py index 23f41fb..8c9fc51 100644 --- a/commits_parser.py +++ b/commits_parser.py @@ -7,6 +7,8 @@ TIMEDELTA = 0.05 TIMEZONE = 'Europe/Moscow' FIELDNAMES = ('repository name', 'author name', 'author login', 'author email', 'date and time', 'changed files', 'commit id', 'branch') +FORKED_REPO = False +ORIG_REPO_COMMITS = [] def log_commit_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: @@ -18,7 +20,7 @@ def log_commit_to_stdout(info): print(info) -def log_repository_commits(repository: Repository, csv_name, start, finish, branch): +def log_repository_commits(repository: Repository, csv_name, start, finish, branch, fork_flag): branches = [] match branch: case 'all': @@ -37,11 +39,13 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran pytz.timezone(TIMEZONE)) < start or commit.commit.author.date.astimezone( pytz.timezone(TIMEZONE)) > finish: continue - if commit.commit is not None: + if commit.commit is not None and commit.commit.sha not in ORIG_REPO_COMMITS: nvl = lambda val: val or EMPTY_FIELD commit_data = [repository.full_name, commit.commit.author.name, nvl(commit.author.login), nvl(commit.commit.author.email), commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch] info = dict(zip(FIELDNAMES, commit_data)) + if fork_flag: + ORIG_REPO_COMMITS.append(info['commit id']) log_commit_to_csv(info, csv_name) log_commit_to_stdout(info) @@ -57,11 +61,11 @@ def log_commits(client: Github, working_repos, csv_name, start, finish, branch, for repo in working_repos: try: print('=' * 20, repo.full_name, '=' * 20) - log_repository_commits(repo, csv_name, start, finish, branch) + log_repository_commits(repo, csv_name, start, finish, branch, fork_flag) if fork_flag: for forked_repo in repo.get_forks(): print('=' * 20, "FORKED:", forked_repo.full_name, '=' * 20) - log_repository_commits(forked_repo, csv_name, start, finish, branch) + log_repository_commits(forked_repo, csv_name, start, finish, branch, FORKED_REPO) sleep(TIMEDELTA) sleep(TIMEDELTA) except Exception as e: From fb860179c1abc420dd3ef9802ff5dca63cddd2b3 Mon Sep 17 00:00:00 2001 From: SaveliyBorivets Date: Mon, 29 Jul 2024 13:20:55 +0300 Subject: [PATCH 2/4] iss36 added and deleted lines update --- commits_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commits_parser.py b/commits_parser.py index 8c9fc51..e7abf34 100644 --- a/commits_parser.py +++ b/commits_parser.py @@ -6,7 +6,7 @@ EMPTY_FIELD = 'Empty field' TIMEDELTA = 0.05 TIMEZONE = 'Europe/Moscow' -FIELDNAMES = ('repository name', 'author name', 'author login', 'author email', 'date and time', 'changed files', 'commit id', 'branch') +FIELDNAMES = ('repository name', 'author name', 'author login', 'author email', 'date and time', 'changed files', 'commit id', 'branch', 'added lines', 'deleted lines') FORKED_REPO = False ORIG_REPO_COMMITS = [] @@ -42,7 +42,7 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran if commit.commit is not None and commit.commit.sha not in ORIG_REPO_COMMITS: nvl = lambda val: val or EMPTY_FIELD commit_data = [repository.full_name, commit.commit.author.name, nvl(commit.author.login), nvl(commit.commit.author.email), - commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch] + commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch, commit.stats.additions, commit.stats.deletions] info = dict(zip(FIELDNAMES, commit_data)) if fork_flag: ORIG_REPO_COMMITS.append(info['commit id']) From 66885f9be266c867447f7010b5bdf536c49f69f5 Mon Sep 17 00:00:00 2001 From: SaveliyBorivets Date: Mon, 29 Jul 2024 15:15:37 +0300 Subject: [PATCH 3/4] iss39 constants.py invasion + refractoring --- commits_parser.py | 15 +++----- constants.py | 76 +++++++++++++++++++++++++++++++++++++++++ git_logger.py | 4 +-- invites_parser.py | 5 ++- issues_parser.py | 50 ++++++++------------------- pull_requests_parser.py | 58 ++++++++----------------------- wikipars.py | 27 +++++---------- 7 files changed, 122 insertions(+), 113 deletions(-) create mode 100644 constants.py diff --git a/commits_parser.py b/commits_parser.py index e7abf34..9901016 100644 --- a/commits_parser.py +++ b/commits_parser.py @@ -2,17 +2,11 @@ import pytz from time import sleep from github import Github, Repository, GithubException, PullRequest - -EMPTY_FIELD = 'Empty field' -TIMEDELTA = 0.05 -TIMEZONE = 'Europe/Moscow' -FIELDNAMES = ('repository name', 'author name', 'author login', 'author email', 'date and time', 'changed files', 'commit id', 'branch', 'added lines', 'deleted lines') -FORKED_REPO = False -ORIG_REPO_COMMITS = [] +from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, FORKED_REPO, ORIG_REPO_COMMITS, COMMIT_FIELDNAMES def log_commit_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: - writer = csv.DictWriter(file, fieldnames=FIELDNAMES) + writer = csv.DictWriter(file, fieldnames=COMMIT_FIELDNAMES) writer.writerow(info) @@ -43,7 +37,7 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran nvl = lambda val: val or EMPTY_FIELD commit_data = [repository.full_name, commit.commit.author.name, nvl(commit.author.login), nvl(commit.commit.author.email), commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch, commit.stats.additions, commit.stats.deletions] - info = dict(zip(FIELDNAMES, commit_data)) + info = dict(zip(COMMIT_FIELDNAMES, commit_data)) if fork_flag: ORIG_REPO_COMMITS.append(info['commit id']) @@ -55,7 +49,7 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran def log_commits(client: Github, working_repos, csv_name, start, finish, branch, fork_flag): with open(csv_name, 'w', newline='') as file: writer = csv.writer(file) - writer.writerow(FIELDNAMES) + writer.writerow(COMMIT_FIELDNAMES) for repo in working_repos: @@ -68,5 +62,6 @@ def log_commits(client: Github, working_repos, csv_name, start, finish, branch, log_repository_commits(forked_repo, csv_name, start, finish, branch, FORKED_REPO) sleep(TIMEDELTA) sleep(TIMEDELTA) + ORIG_REPO_COMMITS.clear() except Exception as e: print(e) diff --git a/constants.py b/constants.py new file mode 100644 index 0000000..e2b45ba --- /dev/null +++ b/constants.py @@ -0,0 +1,76 @@ +#For all +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' + +#Fieldnames +REPO_NAME = 'repository name' +AUTHOR_NAME = 'author name' +AUTHOR_LOGIN = 'author login' +AUTHOR_EMAIL = 'author email' +DATE_AND_TIME = 'date and time' +CHANGED_FILES = 'changed files' +COMMIT_ID = 'commit id' +BRANCH = 'branch' +ADDED_LINES = 'added lines' +DELETED_LINES = 'deleted lines' +TITLE = 'title' +ID = 'id' +STATE = 'state' +COMMIT_INTO = 'commit into' +COMMIT_FROM = 'commit from' +CREATED_AT = 'created at' +CREATOR_NAME = 'creator name' +CREATOR_LOGIN = 'creator login' +CREATOR_EMAIL = 'creator email' +COMMENT_BODY = 'comment body' +COMMENT_CREATED_AT = 'comment created at' +COMMENT_AUTHOR_NAME = 'comment author name' +COMMENT_AUTHOR_LOGIN = 'comment author login' +COMMENT_AUTHOR_EMAIL = 'comment author email' +MERGER_NAME = 'merger name' +MERGER_LOGIN = 'merger login' +MERGER_EMAIL = 'merger email' +SOURCE_BRANCH = 'source branch' +TARGET_BRANCH = 'target branch' +ASSIGNEE_STORY = 'assignee story' +RELATED_ISSUES = 'related issues' +LABELS = 'labels' +MILESTONE = 'milestone' +NUMBER = 'number' +TASK = 'task' +CLOSER_NAME = 'closer name' +CLOSER_LOGIN = 'closer login' +CLOSER_EMAIL = 'closer email' +CLOSED_AT = 'closed at' +CONNECTED_PULL_REQUESTS = 'connected pull requests' +INVITED_LOGIN = 'invited login' +INVITE_CREATION_DATE = 'invite creation date' +INVITATION_URL = 'invitation url' +PAGE = 'page' +ACTION = 'action' +REVISION_ID = 'revision id' + +#For commits +FORKED_REPO = False +ORIG_REPO_COMMITS = [] +COMMIT_FIELDNAMES = (REPO_NAME, AUTHOR_NAME, AUTHOR_LOGIN, AUTHOR_EMAIL, DATE_AND_TIME, CHANGED_FILES, COMMIT_ID, BRANCH, ADDED_LINES, DELETED_LINES) + +#For pull requests +PULL_REQUEST_FIELDNAMES = (REPO_NAME, TITLE, ID, STATE, COMMIT_INTO, COMMIT_FROM, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN, CREATOR_EMAIL, + CHANGED_FILES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL, + MERGER_NAME, MERGER_LOGIN, MERGER_EMAIL, SOURCE_BRANCH, TARGET_BRANCH, ASSIGNEE_STORY, RELATED_ISSUES, LABELS, MILESTONE) + +#For issues +ISSUE_FIELDNAMES = (REPO_NAME, NUMBER, TITLE, STATE, TASK, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN, CREATOR_EMAIL, CLOSER_NAME, CLOSER_LOGIN, + CLOSER_EMAIL, CLOSED_AT, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL, + ASSIGNEE_STORY, CONNECTED_PULL_REQUESTS, LABELS, MILESTONE) + +#For invites +INVITE_FIELDNAMES = (REPO_NAME, INVITED_LOGIN, INVITE_CREATION_DATE, INVITATION_URL) + +#For wikis +EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" #Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython +ACTIVITY = {"A": "Страница добавлена", "M": "Страница изменена", "D": "Страница удалена", "R": "Страница переименована"} +ENG_ACTIVITY = {"A" : "Page added", "M" : "Page modified", "D" : "Page deleted", "R": "Page renamed"} +WIKI_FIELDNAMES = (REPO_NAME, AUTHOR_NAME, AUTHOR_LOGIN, DATE_AND_TIME, PAGE, ACTION, REVISION_ID, ADDED_LINES, DELETED_LINES) diff --git a/git_logger.py b/git_logger.py index aa85441..1fcecc2 100644 --- a/git_logger.py +++ b/git_logger.py @@ -1,8 +1,6 @@ from github import Github, GithubException, PullRequest from time import sleep - -TIMEDELTA = 0.05 -TIMEZONE = 'Europe/Moscow' +from constants import TIMEDELTA, TIMEZONE def login(token): client = Github(login_or_token=token) diff --git a/invites_parser.py b/invites_parser.py index 9540c52..32283d2 100644 --- a/invites_parser.py +++ b/invites_parser.py @@ -4,8 +4,7 @@ import json from time import sleep from github import Github, Repository, GithubException, PullRequest - -FIELDNAMES = ('repository name', 'invited login', 'invite creation date', 'invitation url') +from constants import INVITE_FIELDNAMES def log_inviter(repo, invite): invite_info = [repo.full_name, invite.invitee.login, invite.created_at.strftime("%d/%m/%Y, %H:%M:%S"), invite.html_url] @@ -16,7 +15,7 @@ def log_inviter(repo, invite): def log_invitations(client: Github, working_repos, csv_name): with open(csv_name, 'w', newline='') as file: writer = csv.writer(file) - writer.writerow(FIELDNAMES) + writer.writerow(INVITE_FIELDNAMES) for repo in working_repos: print('=' * 20, repo.full_name, '=' * 20) invitations = repo.get_pending_invitations() diff --git a/issues_parser.py b/issues_parser.py index df19b45..2ce4d5e 100644 --- a/issues_parser.py +++ b/issues_parser.py @@ -5,18 +5,11 @@ from time import sleep from git_logger import get_assignee_story from github import Github, Repository, GithubException, PullRequest - -EMPTY_FIELD = 'Empty field' -TIMEDELTA = 0.05 -TIMEZONE = 'Europe/Moscow' -FIELDNAMES = ('repository name', 'number', 'title', 'state', 'task', 'created at', 'creator name', 'creator login', - 'creator email', 'closer name', 'closer login', 'closer email', 'closed at', 'comment body', - 'comment created at', 'comment author name', 'comment author login', 'comment author email', - 'assignee story', 'connected pull requests', 'labels', 'milestone') +from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, ISSUE_FIELDNAMES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL def log_issue_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: - writer = csv.DictWriter(file, fieldnames=FIELDNAMES) + writer = csv.DictWriter(file, fieldnames=ISSUE_FIELDNAMES) writer.writerow(info) @@ -93,36 +86,21 @@ def log_repository_issues(repository: Repository, csv_name, token, start, finish continue nvl = lambda val: val or EMPTY_FIELD get_info = lambda obj, attr: EMPTY_FIELD if obj is None else getattr(obj, attr) - info_tmp = { - 'repository name': repository.full_name, 'number': issue.number, 'title': issue.title, - 'state': issue.state, 'task': issue.body, - 'created at': issue.created_at, - 'creator name': get_info(issue.user, 'name'), - 'creator login': get_info(issue.user, 'login'), - 'creator email': get_info(issue.user, 'email'), - 'closed at': nvl(issue.closed_at), - 'closer name': get_info(issue.closed_by, 'name'), - 'closer login': get_info(issue.closed_by, 'login'), - 'closer email': get_info(issue.closed_by, 'email'), - 'comment body': EMPTY_FIELD, - 'comment created at': EMPTY_FIELD, - 'comment author name': EMPTY_FIELD, - 'comment author login': EMPTY_FIELD, - 'comment author email': EMPTY_FIELD, - 'assignee story': get_assignee_story(issue), - 'connected pull requests': EMPTY_FIELD if issue.number is None else get_connected_pulls(issue.number, repository.owner, repository.name, token), - 'labels': EMPTY_FIELD if issue.labels is None else ';'.join([label.name for label in issue.labels]), - 'milestone': get_info(issue.milestone, 'title') - } + issue_data = [repository.full_name, issue.number, issue.title, issue.state, issue.body, issue.created_at, get_info(issue.user, 'name'), + get_info(issue.user, 'login'), get_info(issue.user, 'email'), nvl(issue.closed_at), get_info(issue.closed_by, 'name'), + get_info(issue.closed_by, 'login'), get_info(issue.closed_by, 'email'), EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, + EMPTY_FIELD, get_assignee_story(issue), EMPTY_FIELD if issue.number is None else get_connected_pulls(issue.number, repository.owner, repository.name, token), + EMPTY_FIELD if issue.labels is None else ';'.join([label.name for label in issue.labels]), get_info(issue.milestone, 'title')] + info_tmp = dict(zip(ISSUE_FIELDNAMES, issue_data)) if issue.get_comments().totalCount > 0: for comment in issue.get_comments(): info = info_tmp - info['comment body'] = comment.body - info['comment created at'] = comment.created_at - info['comment author name'] = comment.user.name - info['comment author login'] = comment.user.login - info['comment author email'] = comment.user.email + info[COMMENT_BODY] = comment.body + info[COMMENT_CREATED_AT] = comment.created_at + info[COMMENT_AUTHOR_NAME] = comment.user.name + info[COMMENT_AUTHOR_LOGIN] = comment.user.login + info[COMMENT_AUTHOR_EMAIL] = nvl(comment.user.email) log_issue_to_csv(info, csv_name) log_issue_to_stdout(info) else: @@ -134,7 +112,7 @@ def log_repository_issues(repository: Repository, csv_name, token, start, finish def log_issues(client: Github, working_repo, csv_name, token, start, finish, fork_flag): with open(csv_name, 'w', newline='') as file: writer = csv.writer(file) - writer.writerow(FIELDNAMES) + writer.writerow(ISSUE_FIELDNAMES) for repo in working_repo: try: diff --git a/pull_requests_parser.py b/pull_requests_parser.py index 5cb85bc..e35f603 100644 --- a/pull_requests_parser.py +++ b/pull_requests_parser.py @@ -5,15 +5,7 @@ from time import sleep from git_logger import get_assignee_story from github import Github, Repository, GithubException, PullRequest - -EMPTY_FIELD = 'Empty field' -TIMEDELTA = 0.05 -TIMEZONE = 'Europe/Moscow' -FIELDNAMES = ('repository name', 'title', 'id', 'state', 'commit into', 'commit from', 'created at', 'creator name', - 'creator login', 'creator email', 'changed files', 'comment body', - 'comment created at', 'comment author name', 'comment author login', - 'comment author email', 'merger name', 'merger login', 'merger email', 'source branch', - 'target branch', 'assignee story', 'related issues', 'labels', 'milestone') +from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, PULL_REQUEST_FIELDNAMES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL def log_pr_to_stdout(info): print(info) @@ -21,7 +13,7 @@ def log_pr_to_stdout(info): def log_pr_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: - writer = csv.DictWriter(file, fieldnames=FIELDNAMES) + writer = csv.DictWriter(file, fieldnames=PULL_REQUEST_FIELDNAMES) writer.writerow(info) @@ -78,42 +70,22 @@ def log_repositories_pr(repository: Repository, csv_name, token, start, finish): continue nvl = lambda val: val or EMPTY_FIELD get_info = lambda obj, attr: EMPTY_FIELD if obj is None else getattr(obj, attr) - info_tmp = { - 'repository name': repository.full_name, - 'title': pull.title, - 'id': pull.number, - 'state': pull.state, - 'commit into': pull.base.label, - 'commit from': pull.head.label, - 'created at': pull.created_at, - 'creator name': nvl(pull.user.name), - 'creator login': pull.user.login, - 'creator email': pull.user.email, - 'changed files': '; '.join([file.filename for file in pull.get_files()]), - 'comment body': EMPTY_FIELD, - 'comment created at': EMPTY_FIELD, - 'comment author name': EMPTY_FIELD, - 'comment author login': EMPTY_FIELD, - 'comment author email': EMPTY_FIELD, - 'merger name': get_info(pull.merged_by, 'name'), - 'merger login': get_info(pull.merged_by, 'login'), - 'merger email': get_info(pull.merged_by, 'email'), - 'source branch': pull.head.ref, - 'target branch': pull.base.ref, - 'assignee story': get_assignee_story(pull), - 'related issues': EMPTY_FIELD if pull.issue_url is None else get_related_issues(pull.number, repository.owner, repository.name, token), - 'labels': EMPTY_FIELD if pull.labels is None else ';'.join([label.name for label in pull.labels]), - 'milestone': get_info(pull.milestone, 'title') - } + pr_data = [repository.full_name, pull.title, pull.number, pull.state, pull.base.label, pull.head.label, pull.created_at, + nvl(pull.user.name), pull.user.login, pull.user.email, '; '.join([file.filename for file in pull.get_files()]), + EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, get_info(pull.merged_by, 'name'), + get_info(pull.merged_by, 'login'), get_info(pull.merged_by, 'email'), pull.head.ref, pull.base.ref, + get_assignee_story(pull), EMPTY_FIELD if pull.issue_url is None else get_related_issues(pull.number, repository.owner, repository.name, token), + EMPTY_FIELD if pull.labels is None else ';'.join([label.name for label in pull.labels]), get_info(pull.milestone, 'title')] + info_tmp = dict(zip(PULL_REQUEST_FIELDNAMES, pr_data)) if pull.get_comments().totalCount > 0: for comment in pull.get_comments(): info = info_tmp - info['comment body'] = comment.body - info['comment created at'] = comment.created_at - info['comment author name'] = comment.user.name - info['comment author login'] = comment.user.login - info['comment author email'] = nvl(comment.user.email) + info[COMMENT_BODY] = comment.body + info[COMMENT_CREATED_AT] = comment.created_at + info[COMMENT_AUTHOR_NAME] = comment.user.name + info[COMMENT_AUTHOR_LOGIN] = comment.user.login + info[COMMENT_AUTHOR_EMAIL] = nvl(comment.user.email) log_pr_to_csv(info, csv_name) log_pr_to_stdout(info) else: @@ -125,7 +97,7 @@ def log_repositories_pr(repository: Repository, csv_name, token, start, finish): def log_pull_requests(client: Github, working_repos, csv_name, token, start, finish, fork_flag): with open(csv_name, 'w', newline='') as file: writer = csv.writer(file) - writer.writerow(FIELDNAMES) + writer.writerow(PULL_REQUEST_FIELDNAMES) for repo in working_repos: try: diff --git a/wikipars.py b/wikipars.py index 0b8c5a0..4ce30c7 100644 --- a/wikipars.py +++ b/wikipars.py @@ -2,8 +2,7 @@ import os import time import csv - -WIKI_FIELDNAMES = ['repository name', 'author name', 'author login', 'datetime', 'page', 'action', 'revision id', 'added lines', 'deleted lines'] +from constants import WIKI_FIELDNAMES, EMPTY_FIELD, EMPTY_TREE_SHA, ACTIVITY, AUTHOR_LOGIN, PAGE, ACTION def log_wiki_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: @@ -48,30 +47,22 @@ def wikiparser(client, repositories, path_drepo, csv_name): print("=" * 20, name_rep, "=" * 20) #Вывод изменений - EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" #Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython wiki_commits = repo.iter_commits(all=True) - activity = {"A" : "Страница добавлена", "M" : "Страница изменена", "D" : "Страница удалена", "R":"Страница переименована"} - #eng_activity = {"A" : "Page added", "M" : "Page modified", "D" : "Page deleted", "R": "Page renamed"} for commit in wiki_commits: data_commit = dict() parent = commit.parents - data_commit["repository name"] = name_rep - data_commit["author name"] = commit.author + commit_data = [name_rep, commit.author, EMPTY_FIELD, time.strftime("%Y-%m-%d %H:%M:%S%z", time.gmtime(commit.committed_date)), + EMPTY_FIELD, commit, commit.stats.total["insertions"], commit.stats.total["deletions"]] + info = dict(zip(WIKI_FIELDNAMES, commit_data)) if commit.author.email and len(commit.author.email.split('+')) > 1: - data_commit["author login"] = commit.author.email.split('+')[1].split('@users')[0] - else: - data_commit["author login"] = "empty login" - data_commit["datetime"] = time.strftime("%Y-%m-%d %H:%M:%S%z", time.gmtime(commit.committed_date)) + data_commit[AUTHOR_LOGIN] = commit.author.email.split('+')[1].split('@users')[0] if parent: - data_commit["page"] = ';'.join([diff.b_path for diff in parent[0].diff(commit)]) - data_commit["action"] = ';'.join([activity[diff.change_type] for diff in parent[0].diff(commit)]) + data_commit[PAGE] = ';'.join([diff.b_path for diff in parent[0].diff(commit)]) + data_commit[ACTION] = ';'.join([ACTIVITY[diff.change_type] for diff in parent[0].diff(commit)]) else: #Первый коммит - data_commit["page"] = ';'.join([diff.b_path for diff in commit.diff(EMPTY_TREE_SHA)]) - data_commit["action"] = ';'.join([activity["A"]]) - data_commit["revision id"] = commit - data_commit["added lines"] = commit.stats.total["insertions"] - data_commit["deleted lines"] = commit.stats.total["deletions"] + data_commit[PAGE] = ';'.join([diff.b_path for diff in commit.diff(EMPTY_TREE_SHA)]) + data_commit[ACTION] = ';'.join([ACTIVITY["A"]]) for fieldname in data_commit: print(fieldname, data_commit[fieldname], sep=': ') print("-" * 40) From e03f7664da59d7affcea407d9308a2d39ad3277f Mon Sep 17 00:00:00 2001 From: SaveliyBorivets Date: Wed, 18 Sep 2024 02:32:22 +0300 Subject: [PATCH 4/4] !comeback! - flake8 fixes, but without long lines, lambda and one bare except --- commits_parser.py | 10 +++++----- constants.py | 25 +++++++++++++------------ export_sheets.py | 3 ++- git_logger.py | 3 ++- invites_parser.py | 14 ++++++-------- issues_parser.py | 7 ++++--- main.py | 6 ++++-- pull_requests_parser.py | 3 ++- wikipars.py | 34 +++++++++++++++++----------------- 9 files changed, 55 insertions(+), 50 deletions(-) diff --git a/commits_parser.py b/commits_parser.py index 9901016..0376d84 100644 --- a/commits_parser.py +++ b/commits_parser.py @@ -1,8 +1,10 @@ import csv import pytz from time import sleep -from github import Github, Repository, GithubException, PullRequest -from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, FORKED_REPO, ORIG_REPO_COMMITS, COMMIT_FIELDNAMES +from github import Github, Repository +from constants import (EMPTY_FIELD, TIMEDELTA, TIMEZONE, + FORKED_REPO, ORIG_REPO_COMMITS, COMMIT_FIELDNAMES) + def log_commit_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: @@ -30,8 +32,7 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran # TODO add support of since and until in https://pygithub.readthedocs.io/en/stable/github_objects/Repository.html#github.Repository.Repository.get_commits for commit in repository.get_commits(sha=branch): if commit.commit.author.date.astimezone( - pytz.timezone(TIMEZONE)) < start or commit.commit.author.date.astimezone( - pytz.timezone(TIMEZONE)) > finish: + pytz.timezone(TIMEZONE)) < start or commit.commit.author.date.astimezone(pytz.timezone(TIMEZONE)) > finish: continue if commit.commit is not None and commit.commit.sha not in ORIG_REPO_COMMITS: nvl = lambda val: val or EMPTY_FIELD @@ -51,7 +52,6 @@ def log_commits(client: Github, working_repos, csv_name, start, finish, branch, writer = csv.writer(file) writer.writerow(COMMIT_FIELDNAMES) - for repo in working_repos: try: print('=' * 20, repo.full_name, '=' * 20) diff --git a/constants.py b/constants.py index e2b45ba..364f96b 100644 --- a/constants.py +++ b/constants.py @@ -1,9 +1,9 @@ -#For all +# For all EMPTY_FIELD = 'Empty field' TIMEDELTA = 0.05 TIMEZONE = 'Europe/Moscow' -#Fieldnames +# Fieldnames REPO_NAME = 'repository name' AUTHOR_NAME = 'author name' AUTHOR_LOGIN = 'author login' @@ -51,26 +51,27 @@ ACTION = 'action' REVISION_ID = 'revision id' -#For commits +# For commits FORKED_REPO = False ORIG_REPO_COMMITS = [] COMMIT_FIELDNAMES = (REPO_NAME, AUTHOR_NAME, AUTHOR_LOGIN, AUTHOR_EMAIL, DATE_AND_TIME, CHANGED_FILES, COMMIT_ID, BRANCH, ADDED_LINES, DELETED_LINES) -#For pull requests +# For pull requests PULL_REQUEST_FIELDNAMES = (REPO_NAME, TITLE, ID, STATE, COMMIT_INTO, COMMIT_FROM, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN, CREATOR_EMAIL, CHANGED_FILES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL, MERGER_NAME, MERGER_LOGIN, MERGER_EMAIL, SOURCE_BRANCH, TARGET_BRANCH, ASSIGNEE_STORY, RELATED_ISSUES, LABELS, MILESTONE) -#For issues -ISSUE_FIELDNAMES = (REPO_NAME, NUMBER, TITLE, STATE, TASK, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN, CREATOR_EMAIL, CLOSER_NAME, CLOSER_LOGIN, - CLOSER_EMAIL, CLOSED_AT, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL, - ASSIGNEE_STORY, CONNECTED_PULL_REQUESTS, LABELS, MILESTONE) +# For issues +ISSUE_FIELDNAMES = (REPO_NAME, NUMBER, TITLE, STATE, TASK, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN, + CREATOR_EMAIL, CLOSER_NAME, CLOSER_LOGIN, CLOSER_EMAIL, CLOSED_AT, COMMENT_BODY, + COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL, + ASSIGNEE_STORY, CONNECTED_PULL_REQUESTS, LABELS, MILESTONE) -#For invites +# For invites INVITE_FIELDNAMES = (REPO_NAME, INVITED_LOGIN, INVITE_CREATION_DATE, INVITATION_URL) -#For wikis -EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" #Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython +# For wikis +EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" # Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython ACTIVITY = {"A": "Страница добавлена", "M": "Страница изменена", "D": "Страница удалена", "R": "Страница переименована"} -ENG_ACTIVITY = {"A" : "Page added", "M" : "Page modified", "D" : "Page deleted", "R": "Page renamed"} +ENG_ACTIVITY = {"A": "Page added", "M": "Page modified", "D": "Page deleted", "R": "Page renamed"} WIKI_FIELDNAMES = (REPO_NAME, AUTHOR_NAME, AUTHOR_LOGIN, DATE_AND_TIME, PAGE, ACTION, REVISION_ID, ADDED_LINES, DELETED_LINES) diff --git a/export_sheets.py b/export_sheets.py index 8e5c16e..884574c 100644 --- a/export_sheets.py +++ b/export_sheets.py @@ -7,8 +7,9 @@ "what?": 3 }] + def write_data_to_table(csv_path, google_token, table_id, sheet_id): - if google_token and sheet_id and table_id : + if google_token and sheet_id and table_id: gc = pygsheets.authorize(service_file=google_token) sh = gc.open_by_key(table_id) diff --git a/git_logger.py b/git_logger.py index 1fcecc2..5473f70 100644 --- a/git_logger.py +++ b/git_logger.py @@ -1,6 +1,7 @@ from github import Github, GithubException, PullRequest from time import sleep -from constants import TIMEDELTA, TIMEZONE +from constants import TIMEDELTA + def login(token): client = Github(login_or_token=token) diff --git a/invites_parser.py b/invites_parser.py index 32283d2..2d72da9 100644 --- a/invites_parser.py +++ b/invites_parser.py @@ -1,12 +1,10 @@ import csv -import pytz -import requests -import json from time import sleep -from github import Github, Repository, GithubException, PullRequest -from constants import INVITE_FIELDNAMES +from github import Github +from constants import INVITE_FIELDNAMES, TIMEDELTA -def log_inviter(repo, invite): + +def log_inviter(repo, invite, writer): invite_info = [repo.full_name, invite.invitee.login, invite.created_at.strftime("%d/%m/%Y, %H:%M:%S"), invite.html_url] writer.writerow(invite_info) print(invite_info) @@ -21,7 +19,7 @@ def log_invitations(client: Github, working_repos, csv_name): invitations = repo.get_pending_invitations() for invite in invitations: try: - log_inviter(repo, invite) - sleep(timedelta) + log_inviter(repo, invite, writer) + sleep(TIMEDELTA) except Exception as e: print(e) diff --git a/issues_parser.py b/issues_parser.py index 2ce4d5e..654ff84 100644 --- a/issues_parser.py +++ b/issues_parser.py @@ -4,9 +4,10 @@ import json from time import sleep from git_logger import get_assignee_story -from github import Github, Repository, GithubException, PullRequest +from github import Github, Repository from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, ISSUE_FIELDNAMES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL + def log_issue_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: writer = csv.DictWriter(file, fieldnames=ISSUE_FIELDNAMES) @@ -67,10 +68,10 @@ def get_connected_pulls(issue_number, repo_owner, repo_name, token): if (pull_request_data is not None): issues_data = pull_request_data["timelineItems"]["nodes"] for pulls in issues_data: - if (pulls.get("CrossReferencedEvent") != None and pulls.get("CrossReferencedEvent").get( + if (pulls.get("CrossReferencedEvent") is not None and pulls.get("CrossReferencedEvent").get( "url") not in list_url): list_url.append(pulls.get("CrossReferencedEvent").get("url")) - if (pulls.get("ConnectedEvent") != None and pulls.get("ConnectedEvent").get("url") not in list_url): + if (pulls.get("ConnectedEvent") is not None and pulls.get("ConnectedEvent").get("url") not in list_url): list_url.append(pulls.get("ConnectedEvent").get("url")) if (list_url == []): return 'Empty field' diff --git a/main.py b/main.py index e4c435a..6616ca9 100644 --- a/main.py +++ b/main.py @@ -9,6 +9,8 @@ import issues_parser import invites_parser import wikipars +from constants import TIMEZONE + def parse_args(): parser = argparse.ArgumentParser() @@ -32,7 +34,7 @@ def parse_args(): parser.add_argument('--sheet_id', type=str, required=False, help='Specify title for a sheet in a document in which data will be printed') args = parser.parse_args() - + if args.export_google_sheets: for action in parser._actions: if action.dest == 'google_token': @@ -50,7 +52,7 @@ def parse_time(datetime_str): start = [int(i) for i in start] start_datetime = datetime(year=start[0], month=start[1], day=start[2], hour=start[3], minute=start[4], second=start[5]) - return start_datetime.astimezone(pytz.timezone(git_logger.TIMEZONE)) + return start_datetime.astimezone(pytz.timezone(TIMEZONE)) def main(): diff --git a/pull_requests_parser.py b/pull_requests_parser.py index e35f603..71b154b 100644 --- a/pull_requests_parser.py +++ b/pull_requests_parser.py @@ -4,9 +4,10 @@ import json from time import sleep from git_logger import get_assignee_story -from github import Github, Repository, GithubException, PullRequest +from github import Github, Repository from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, PULL_REQUEST_FIELDNAMES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL + def log_pr_to_stdout(info): print(info) diff --git a/wikipars.py b/wikipars.py index 4ce30c7..7a37285 100644 --- a/wikipars.py +++ b/wikipars.py @@ -4,6 +4,7 @@ import csv from constants import WIKI_FIELDNAMES, EMPTY_FIELD, EMPTY_TREE_SHA, ACTIVITY, AUTHOR_LOGIN, PAGE, ACTION + def log_wiki_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: writer = csv.DictWriter(file, fieldnames=WIKI_FIELDNAMES) @@ -15,17 +16,17 @@ def wikiparser(client, repositories, path_drepo, csv_name): writer = csv.writer(file) writer.writerow(WIKI_FIELDNAMES) - #Создаем список репозиториев из файла + # Создаем список репозиториев из файла with open(repositories, 'r') as file: list_repos = [x for x in file.read().split('\n') if x] error_repos = [] data_changes = [] for name_rep in list_repos: - #Проверяем, есть ли репозиторий в папке + # Проверяем, есть ли репозиторий в папке dir_path = path_drepo + "/" + name_rep if os.path.exists(dir_path): - #Обновляем репозиторий + # Обновляем репозиторий if len(os.listdir(dir_path)) > 0: repo = Repo(dir_path) repo.remotes.origin.pull() @@ -34,7 +35,7 @@ def wikiparser(client, repositories, path_drepo, csv_name): error_repos.append(name_rep) continue else: - #Клонируем репозиторий в папку + # Клонируем репозиторий в папку dir_path = path_drepo + "/" + name_rep os.makedirs(dir_path, exist_ok=True) repo_url = f"git@github.com:{name_rep}.wiki.git" @@ -46,30 +47,29 @@ def wikiparser(client, repositories, path_drepo, csv_name): continue print("=" * 20, name_rep, "=" * 20) - #Вывод изменений + # Вывод изменений wiki_commits = repo.iter_commits(all=True) for commit in wiki_commits: - data_commit = dict() parent = commit.parents commit_data = [name_rep, commit.author, EMPTY_FIELD, time.strftime("%Y-%m-%d %H:%M:%S%z", time.gmtime(commit.committed_date)), EMPTY_FIELD, commit, commit.stats.total["insertions"], commit.stats.total["deletions"]] info = dict(zip(WIKI_FIELDNAMES, commit_data)) if commit.author.email and len(commit.author.email.split('+')) > 1: - data_commit[AUTHOR_LOGIN] = commit.author.email.split('+')[1].split('@users')[0] + info[AUTHOR_LOGIN] = commit.author.email.split('+')[1].split('@users')[0] if parent: - data_commit[PAGE] = ';'.join([diff.b_path for diff in parent[0].diff(commit)]) - data_commit[ACTION] = ';'.join([ACTIVITY[diff.change_type] for diff in parent[0].diff(commit)]) + info[PAGE] = ';'.join([diff.b_path for diff in parent[0].diff(commit)]) + info[ACTION] = ';'.join([ACTIVITY[diff.change_type] for diff in parent[0].diff(commit)]) else: - #Первый коммит - data_commit[PAGE] = ';'.join([diff.b_path for diff in commit.diff(EMPTY_TREE_SHA)]) - data_commit[ACTION] = ';'.join([ACTIVITY["A"]]) - for fieldname in data_commit: - print(fieldname, data_commit[fieldname], sep=': ') + # Первый коммит + info[PAGE] = ';'.join([diff.b_path for diff in commit.diff(EMPTY_TREE_SHA)]) + info[ACTION] = ';'.join([ACTIVITY["A"]]) + for fieldname in info: + print(fieldname, info[fieldname], sep=': ') print("-" * 40) - log_wiki_to_csv(data_commit, csv_name) - data_changes.append(data_commit) + log_wiki_to_csv(info, csv_name) + data_changes.append(info) - #Вывод репозиториев, с которыми возникли ошибки + # Вывод репозиториев, с которыми возникли ошибки if error_repos: print("!=====Проблемные репозитории=====!") for rep in error_repos: