OSLL · SaveliyBorivets · Jul 28, 2024 · Jul 29, 2024 · Jul 29, 2024 · Sep 17, 2024
diff --git a/commits_parser.py b/commits_parser.py
@@ -1,24 +1,22 @@
 import csv
 import pytz
 from time import sleep
-from github import Github, Repository, GithubException, PullRequest
+from github import Github, Repository
+from constants import (EMPTY_FIELD, TIMEDELTA, TIMEZONE,
+                       FORKED_REPO, ORIG_REPO_COMMITS, COMMIT_FIELDNAMES)
 
-EMPTY_FIELD = 'Empty field'
-TIMEDELTA = 0.05
-TIMEZONE = 'Europe/Moscow'
-FIELDNAMES = ('repository name', 'author name', 'author login', 'author email', 'date and time', 'changed files', 'commit id', 'branch')
 
 def log_commit_to_csv(info, csv_name):
     with open(csv_name, 'a', newline='') as file:
-        writer = csv.DictWriter(file, fieldnames=FIELDNAMES)
+        writer = csv.DictWriter(file, fieldnames=COMMIT_FIELDNAMES)
         writer.writerow(info)
 
 
 def log_commit_to_stdout(info):
     print(info)
 
 
-def log_repository_commits(repository: Repository, csv_name, start, finish, branch):
+def log_repository_commits(repository: Repository, csv_name, start, finish, branch, fork_flag):
     branches = []
     match branch:
         case 'all':
@@ -34,14 +32,15 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran
         # TODO add support of since and until in https://pygithub.readthedocs.io/en/stable/github_objects/Repository.html#github.Repository.Repository.get_commits
         for commit in repository.get_commits(sha=branch):
             if commit.commit.author.date.astimezone(
-                    pytz.timezone(TIMEZONE)) < start or commit.commit.author.date.astimezone(
-                pytz.timezone(TIMEZONE)) > finish:
+                    pytz.timezone(TIMEZONE)) < start or commit.commit.author.date.astimezone(pytz.timezone(TIMEZONE)) > finish:
                 continue
-            if commit.commit is not None:
+            if commit.commit is not None and commit.commit.sha not in ORIG_REPO_COMMITS:
                 nvl = lambda val: val or EMPTY_FIELD
                 commit_data = [repository.full_name, commit.commit.author.name, nvl(commit.author.login), nvl(commit.commit.author.email),
-                               commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch]
-                info = dict(zip(FIELDNAMES, commit_data))
+                               commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch, commit.stats.additions, commit.stats.deletions]
+                info = dict(zip(COMMIT_FIELDNAMES, commit_data))
+                if fork_flag:
+                    ORIG_REPO_COMMITS.append(info['commit id'])
 
                 log_commit_to_csv(info, csv_name)
                 log_commit_to_stdout(info)
@@ -51,18 +50,18 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran
 def log_commits(client: Github, working_repos, csv_name, start, finish, branch, fork_flag):
     with open(csv_name, 'w', newline='') as file:
         writer = csv.writer(file)
-        writer.writerow(FIELDNAMES)
-
+        writer.writerow(COMMIT_FIELDNAMES)
 
     for repo in working_repos:
         try:
             print('=' * 20, repo.full_name, '=' * 20)
-            log_repository_commits(repo, csv_name, start, finish, branch)
+            log_repository_commits(repo, csv_name, start, finish, branch, fork_flag)
             if fork_flag:
                 for forked_repo in repo.get_forks():
                     print('=' * 20, "FORKED:", forked_repo.full_name, '=' * 20)
-                    log_repository_commits(forked_repo, csv_name, start, finish, branch)
+                    log_repository_commits(forked_repo, csv_name, start, finish, branch, FORKED_REPO)
                     sleep(TIMEDELTA)
             sleep(TIMEDELTA)
+            ORIG_REPO_COMMITS.clear()
         except Exception as e:
             print(e)
diff --git a/constants.py b/constants.py
@@ -0,0 +1,77 @@
+# For all
+EMPTY_FIELD = 'Empty field'
+TIMEDELTA = 0.05
+TIMEZONE = 'Europe/Moscow'
+
+# Fieldnames
+REPO_NAME = 'repository name'
+AUTHOR_NAME = 'author name'
+AUTHOR_LOGIN = 'author login'
+AUTHOR_EMAIL = 'author email'
+DATE_AND_TIME = 'date and time'
+CHANGED_FILES = 'changed files'
+COMMIT_ID = 'commit id'
+BRANCH = 'branch'
+ADDED_LINES = 'added lines'
+DELETED_LINES = 'deleted lines'
+TITLE = 'title'
+ID = 'id'
+STATE = 'state'
+COMMIT_INTO = 'commit into'
+COMMIT_FROM = 'commit from'
+CREATED_AT = 'created at'
+CREATOR_NAME = 'creator name'
+CREATOR_LOGIN = 'creator login'
+CREATOR_EMAIL = 'creator email'
+COMMENT_BODY = 'comment body'
+COMMENT_CREATED_AT = 'comment created at'
+COMMENT_AUTHOR_NAME = 'comment author name'
+COMMENT_AUTHOR_LOGIN = 'comment author login'
+COMMENT_AUTHOR_EMAIL = 'comment author email'
+MERGER_NAME = 'merger name'
+MERGER_LOGIN = 'merger login'
+MERGER_EMAIL = 'merger email'
+SOURCE_BRANCH = 'source branch'
+TARGET_BRANCH = 'target branch'
+ASSIGNEE_STORY = 'assignee story'
+RELATED_ISSUES = 'related issues'
+LABELS = 'labels'
+MILESTONE = 'milestone'
+NUMBER = 'number'
+TASK = 'task'
+CLOSER_NAME = 'closer name'
+CLOSER_LOGIN = 'closer login'
+CLOSER_EMAIL = 'closer email'
+CLOSED_AT = 'closed at'
+CONNECTED_PULL_REQUESTS = 'connected pull requests'
+INVITED_LOGIN = 'invited login'
+INVITE_CREATION_DATE = 'invite creation date'
+INVITATION_URL = 'invitation url'
+PAGE = 'page'
+ACTION = 'action'
+REVISION_ID = 'revision id'
+
+# For commits
+FORKED_REPO = False
+ORIG_REPO_COMMITS = []
+COMMIT_FIELDNAMES = (REPO_NAME, AUTHOR_NAME, AUTHOR_LOGIN, AUTHOR_EMAIL, DATE_AND_TIME, CHANGED_FILES, COMMIT_ID, BRANCH, ADDED_LINES, DELETED_LINES)
+
+# For pull requests
+PULL_REQUEST_FIELDNAMES = (REPO_NAME, TITLE, ID, STATE, COMMIT_INTO, COMMIT_FROM, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN, CREATOR_EMAIL,
+                           CHANGED_FILES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL,
+                           MERGER_NAME, MERGER_LOGIN, MERGER_EMAIL, SOURCE_BRANCH, TARGET_BRANCH, ASSIGNEE_STORY, RELATED_ISSUES, LABELS, MILESTONE)
+
+# For issues
+ISSUE_FIELDNAMES = (REPO_NAME, NUMBER, TITLE, STATE, TASK, CREATED_AT, CREATOR_NAME, CREATOR_LOGIN,
+                    CREATOR_EMAIL, CLOSER_NAME, CLOSER_LOGIN, CLOSER_EMAIL, CLOSED_AT, COMMENT_BODY,
+                    COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL,
+                    ASSIGNEE_STORY, CONNECTED_PULL_REQUESTS, LABELS, MILESTONE)
+
+# For invites
+INVITE_FIELDNAMES = (REPO_NAME, INVITED_LOGIN, INVITE_CREATION_DATE, INVITATION_URL)
+
+# For wikis
+EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"  # Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython
+ACTIVITY = {"A": "Страница добавлена", "M": "Страница изменена", "D": "Страница удалена", "R": "Страница переименована"}
+ENG_ACTIVITY = {"A": "Page added", "M": "Page modified", "D": "Page deleted", "R": "Page renamed"}
+WIKI_FIELDNAMES = (REPO_NAME, AUTHOR_NAME, AUTHOR_LOGIN, DATE_AND_TIME, PAGE, ACTION, REVISION_ID, ADDED_LINES, DELETED_LINES)
diff --git a/export_sheets.py b/export_sheets.py
@@ -7,8 +7,9 @@
     "what?": 3
 }]
 
+
 def write_data_to_table(csv_path, google_token, table_id, sheet_id):
-    if google_token and sheet_id and table_id :
+    if google_token and sheet_id and table_id:
         gc = pygsheets.authorize(service_file=google_token)
         sh = gc.open_by_key(table_id)
 

diff --git a/git_logger.py b/git_logger.py
@@ -1,8 +1,7 @@
 from github import Github, GithubException, PullRequest
 from time import sleep
+from constants import TIMEDELTA
 
-TIMEDELTA = 0.05
-TIMEZONE = 'Europe/Moscow'
 
 def login(token):
     client = Github(login_or_token=token)

diff --git a/invites_parser.py b/invites_parser.py
@@ -1,13 +1,10 @@
 import csv
-import pytz
-import requests
-import json
 from time import sleep
-from github import Github, Repository, GithubException, PullRequest
+from github import Github
+from constants import INVITE_FIELDNAMES, TIMEDELTA
 
-FIELDNAMES = ('repository name', 'invited login', 'invite creation date', 'invitation url')
 
-def log_inviter(repo, invite):
+def log_inviter(repo, invite, writer):
     invite_info = [repo.full_name, invite.invitee.login, invite.created_at.strftime("%d/%m/%Y, %H:%M:%S"), invite.html_url]
     writer.writerow(invite_info)
     print(invite_info)
@@ -16,13 +13,13 @@ def log_inviter(repo, invite):
 def log_invitations(client: Github, working_repos, csv_name):
     with open(csv_name, 'w', newline='') as file:
         writer = csv.writer(file)
-        writer.writerow(FIELDNAMES)
+        writer.writerow(INVITE_FIELDNAMES)
         for repo in working_repos:
             print('=' * 20, repo.full_name, '=' * 20)
             invitations = repo.get_pending_invitations()
             for invite in invitations:
                 try:
-                    log_inviter(repo, invite)
-                    sleep(timedelta)
+                    log_inviter(repo, invite, writer)
+                    sleep(TIMEDELTA)
                 except Exception as e:
                     print(e)
diff --git a/issues_parser.py b/issues_parser.py
@@ -4,19 +4,13 @@
 import json
 from time import sleep
 from git_logger import get_assignee_story
-from github import Github, Repository, GithubException, PullRequest
+from github import Github, Repository
+from constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE, ISSUE_FIELDNAMES, COMMENT_BODY, COMMENT_CREATED_AT, COMMENT_AUTHOR_NAME, COMMENT_AUTHOR_LOGIN, COMMENT_AUTHOR_EMAIL
 
-EMPTY_FIELD = 'Empty field'
-TIMEDELTA = 0.05
-TIMEZONE = 'Europe/Moscow'
-FIELDNAMES = ('repository name', 'number', 'title', 'state', 'task', 'created at', 'creator name', 'creator login',
-              'creator email', 'closer name', 'closer login', 'closer email', 'closed at', 'comment body',
-              'comment created at', 'comment author name', 'comment author login', 'comment author email',
-              'assignee story', 'connected pull requests', 'labels', 'milestone')
 
 def log_issue_to_csv(info, csv_name):
     with open(csv_name, 'a', newline='') as file:
-        writer = csv.DictWriter(file, fieldnames=FIELDNAMES)
+        writer = csv.DictWriter(file, fieldnames=ISSUE_FIELDNAMES)
         writer.writerow(info)
 
 
@@ -74,10 +68,10 @@ def get_connected_pulls(issue_number, repo_owner, repo_name, token):
     if (pull_request_data is not None):
         issues_data = pull_request_data["timelineItems"]["nodes"]
         for pulls in issues_data:
-            if (pulls.get("CrossReferencedEvent") != None and pulls.get("CrossReferencedEvent").get(
+            if (pulls.get("CrossReferencedEvent") is not None and pulls.get("CrossReferencedEvent").get(
                     "url") not in list_url):
                 list_url.append(pulls.get("CrossReferencedEvent").get("url"))
-            if (pulls.get("ConnectedEvent") != None and pulls.get("ConnectedEvent").get("url") not in list_url):
+            if (pulls.get("ConnectedEvent") is not None and pulls.get("ConnectedEvent").get("url") not in list_url):
                 list_url.append(pulls.get("ConnectedEvent").get("url"))
         if (list_url == []):
             return 'Empty field'
@@ -93,36 +87,21 @@ def log_repository_issues(repository: Repository, csv_name, token, start, finish
             continue
         nvl = lambda val: val or EMPTY_FIELD
         get_info = lambda obj, attr: EMPTY_FIELD if obj is None else getattr(obj, attr)
-        info_tmp = {
-            'repository name': repository.full_name, 'number': issue.number, 'title': issue.title,
-            'state': issue.state, 'task': issue.body,
-            'created at': issue.created_at,
-            'creator name': get_info(issue.user, 'name'),
-            'creator login': get_info(issue.user, 'login'),
-            'creator email': get_info(issue.user, 'email'),
-            'closed at': nvl(issue.closed_at),
-            'closer name': get_info(issue.closed_by, 'name'),
-            'closer login': get_info(issue.closed_by, 'login'),
-            'closer email': get_info(issue.closed_by, 'email'),
-            'comment body': EMPTY_FIELD,
-            'comment created at': EMPTY_FIELD,
-            'comment author name': EMPTY_FIELD,
-            'comment author login': EMPTY_FIELD,
-            'comment author email': EMPTY_FIELD,
-            'assignee story': get_assignee_story(issue),
-            'connected pull requests': EMPTY_FIELD if issue.number is None else get_connected_pulls(issue.number, repository.owner, repository.name, token),
-            'labels': EMPTY_FIELD if issue.labels is None else ';'.join([label.name for label in issue.labels]),
-            'milestone': get_info(issue.milestone, 'title')
-        }
+        issue_data = [repository.full_name, issue.number, issue.title, issue.state, issue.body, issue.created_at, get_info(issue.user, 'name'),
+                      get_info(issue.user, 'login'), get_info(issue.user, 'email'), nvl(issue.closed_at), get_info(issue.closed_by, 'name'),
+                      get_info(issue.closed_by, 'login'), get_info(issue.closed_by, 'email'), EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD, EMPTY_FIELD,
+                      EMPTY_FIELD, get_assignee_story(issue), EMPTY_FIELD if issue.number is None else get_connected_pulls(issue.number, repository.owner, repository.name, token),
+                      EMPTY_FIELD if issue.labels is None else ';'.join([label.name for label in issue.labels]), get_info(issue.milestone, 'title')]
+        info_tmp = dict(zip(ISSUE_FIELDNAMES, issue_data))
 
         if issue.get_comments().totalCount > 0:
             for comment in issue.get_comments():
                 info = info_tmp
-                info['comment body'] = comment.body
-                info['comment created at'] = comment.created_at
-                info['comment author name'] = comment.user.name
-                info['comment author login'] = comment.user.login
-                info['comment author email'] = comment.user.email
+                info[COMMENT_BODY] = comment.body
+                info[COMMENT_CREATED_AT] = comment.created_at
+                info[COMMENT_AUTHOR_NAME] = comment.user.name
+                info[COMMENT_AUTHOR_LOGIN] = comment.user.login
+                info[COMMENT_AUTHOR_EMAIL] = nvl(comment.user.email)
                 log_issue_to_csv(info, csv_name)
                 log_issue_to_stdout(info)
         else:
@@ -134,7 +113,7 @@ def log_repository_issues(repository: Repository, csv_name, token, start, finish
 def log_issues(client: Github, working_repo, csv_name, token, start, finish, fork_flag):
     with open(csv_name, 'w', newline='') as file:
         writer = csv.writer(file)
-        writer.writerow(FIELDNAMES)
+        writer.writerow(ISSUE_FIELDNAMES)
 
     for repo in working_repo:
         try:

diff --git a/main.py b/main.py
@@ -9,6 +9,8 @@
 import issues_parser
 import invites_parser
 import wikipars
+from constants import TIMEZONE
+
 
 def parse_args():
     parser = argparse.ArgumentParser()
@@ -32,7 +34,7 @@ def parse_args():
     parser.add_argument('--sheet_id', type=str, required=False,
                         help='Specify title for a sheet in a document in which data will be printed')
     args = parser.parse_args()
-    
+
     if args.export_google_sheets:
         for action in parser._actions:
             if action.dest == 'google_token':
@@ -50,7 +52,7 @@ def parse_time(datetime_str):
     start = [int(i) for i in start]
     start_datetime = datetime(year=start[0], month=start[1], day=start[2], hour=start[3], minute=start[4],
                               second=start[5])
-    return start_datetime.astimezone(pytz.timezone(git_logger.TIMEZONE))
+    return start_datetime.astimezone(pytz.timezone(TIMEZONE))
 
 
 def main():