forked from adam-mcdaniel/blame-tracker
-
Notifications
You must be signed in to change notification settings - Fork 1
/
blame-tracker.py
executable file
·490 lines (401 loc) · 20.1 KB
/
blame-tracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
#!/usr/bin/env python3
"""
_ _ _ _
| |__| |__ _ _ __ ___ ___| |_ _ _ __ _ __| |_____ _ _
| '_ \ / _` | ' \/ -_)___| _| '_/ _` / _| / / -_) '_|
|_.__/_\__,_|_|_|_\___| \__|_| \__,_\__|_\_\___|_|
Author: Adam McDaniel
File: blame-tracker.py
Date: 01/25/2022
Description: This script accuses the author of commits of all files between two dates in ISO 8601 format.
License: GNU General Public License v3.0
"""
##################################################
# Import libraries
##################################################
import subprocess # For running git commands
import glob # For finding files
import argparse # For reading command line arguments
import re # For extracting the information from the `git blame` output
from sys import stdout, stderr # For printing to stderr
from os.path import basename, abspath # For getting the filename from a path
from datetime import datetime, timedelta # For parsing dates
##################################################
# Constants
##################################################
# The name of this program.
PROGRAM_NAME = basename(__file__)
# The default format string for printing accusations.
DEFAULT_FORMAT = '{name:12} ({author} on {date} at {time}): {content}'
##################################################
# Main
##################################################
def main():
'''
Main function.
Parses command line arguments, collects the accusations, and prints them to the output file.
'''
##################################################
# Parse command line arguments
##################################################
# Create the argument parser
parser = argparse.ArgumentParser(
prog = PROGRAM_NAME,
description='Adam McDaniel <adam-mcdaniel.net> -- This script accuses an author of commits of all files between two dates in ISO 8601 format.')
# Add arguments to the parser
parser.add_argument('-r', '--repo', type=str, help='The directory📁 to search for files in (default is ".").', default='./')
parser.add_argument('-by', '--author', type=str, help='The name📛 of the author to blame', nargs='+', default=[])
parser.add_argument('-t0', '--since', dest='start_date', type=str, help='The start date📅 in ISO 8601 format.')
parser.add_argument('-t1', '--until', dest='end_date', type=str, help='The end date📅 in ISO 8601 format (default is now).', default=datetime.now().isoformat())
parser.add_argument('-d', '--days-ago', type=int, help='The number of days ago⏱️ to shift the search.', default=0)
parser.add_argument('-w', '--weeks-ago', type=int, help='The number of weeks ago⏱️ to shift the search.', default=0)
parser.add_argument('-m', '--minutes-ago', type=int, help='The number of minutes ago⏱️ to shift the search.', default=0)
parser.add_argument('-in', '--include', dest='include', type=str, help='The file patterns📂 to search for (default is "**/*").', default=['**/*'], nargs='+')
parser.add_argument('-ex', '--exclude', type=str, help='The file patterns📂 to exclude (default is none).', default=[], nargs='+')
parser.add_argument('-f', '--format', type=str, help=f'The format string🧶 to print each accusation (default is "{DEFAULT_FORMAT}")', default=DEFAULT_FORMAT)
parser.add_argument('-o', '--output', type=str, help='The output file📝 to write the accusations to.')
parser.add_argument('-i', '--info', action='store_const', const=1, dest='verbose', help='Print info messagesℹ️ (default is disabled).', default=0)
parser.add_argument('-v', '--verbose', type=int, choices=range(1, 4), help='Level of verbose output📢 (default is 0).', default=0)
parser.add_argument('-ws', '--keep-whitespace', action="store_true", help='Keep whitespace in blame output📜 (default is false).', default=False)
parser.add_argument('-s', '--silence-warnings', action='store_true', help='Silence warnings🔇 (default is false).', default=False)
# Parse the arguments
args = parser.parse_args()
##################################################
# Validate arguments
##################################################
# Convert the author names to lowercase
args.author = [author.lower() for author in args.author]
if args.verbose >= 3:
# Print debug information if verbose level is 3 or higher
info("Calculating start date...")
# Calculate the start date using the arguments
if args.start_date or args.days_ago or args.weeks_ago:
# If a start date is specified, use that.
# If no start date is specified, shift the current date.
start_date = parse_date(args.start_date) if args.start_date else datetime.now()
start_date -= timedelta(days=args.days_ago, weeks=args.weeks_ago)
else:
# If no start date is specified, use the beginning of time.
start_date = datetime.min
if args.verbose >= 3:
# Print debug information if verbose level is 3 or higher
info("Parsing end date...")
# Parse the end date
end_date = parse_date(args.end_date)
# Check that end date is after start date
if end_date < start_date:
error('end date must be after start date.')
##################################################
# Begin accusing
##################################################
if args.verbose >= 3:
# Print debug information if verbose level is 3 or higher
info("Collecting accusations...")
# Get the list of files to search
files = get_files(args.repo, args.include, args.exclude)
if args.verbose >= 1:
# Print info messages if verbose level is 1 or higher
info(f"searching for blame in:")
for i, file in enumerate(files):
info(f" {i+1}. {file}")
# Collect the accusations
accusations_by_file = accuse_files(args.repo, files, args.verbose, args.silence_warnings)
if args.verbose >= 3:
# Print debug information if verbose level is 3 or higher
info(f"writing accusations for {len(accusations_by_file)} files...")
##################################################
# Write the accusations to the output file
##################################################
# Get the output file
output_file = open(args.output, 'w') if args.output else stdout
# Print the accusations to the output file
for file_path, file_lines in accusations_by_file.items():
# Unpack each accusation
for author, date, content in file_lines:
# If the user specified an author, filter out accusations by
# other authors. If the user did not specify an author, include
# all accusations.
if args.author and author.lower() not in args.author:
if args.verbose >= 3:
# Print debug information if verbose level is 3 or higher
info(f'ignoring accusation by {author} in {file_path}')
continue
# If the user specified to ignore whitespace, filter out
# accusations that are only whitespace.
if not args.keep_whitespace and content.strip() == '':
if args.verbose >= 3:
# Print debug information if verbose level is 3 or higher
info(f'ignoring whitespace accusation by {author} in {file_path}')
continue
# If the accusation is within the date range, print it
if start_date <= date <= end_date:
# Get the format string specified by the user
format_string = args.format
# Put the accusation into the desired format
formatted_line = format_string.format(
name=basename(file_path),
path=file_path,
author=author,
content=content,
date=f'{date.month:02d}/{date.day:02d}/{date.year}',
time=f'{date.hour:02d}:{date.minute:02d}')
# Print the formatted accusation to the output file
output_file.write(formatted_line + '\n')
# Close the output file
if args.output:
output_file.close()
##################################################
# Print statistics
##################################################
# Collect all the accusations by files into a single list
all_accusations = [accusation for file_accusations in accusations_by_file.values() for accusation in file_accusations]
# Filter the accusations by the date range
accusations_within_timeframe = [a for a in all_accusations if start_date <= a[1] <= end_date]
if args.verbose:
# Print the total number of accusations
authors_stats = analyze_authors(accusations_within_timeframe)
# Calculate the total number of non-blank characters
total_non_blank = sum([stats['non-blank'] for stats in authors_stats.values()])
for author, stats in authors_stats.items():
# Print the statistics for each author
info(f'{author}:')
info(f' {stats["chars"]} characters')
info(f' {stats["lines"]} lines')
info(f' {stats["non-blank-lines"]} non-whitespace lines')
info(f' {stats["two-or-more-char-lines"]} two-or-more-char lines')
info(f' {stats["non-blank"]} non-whitespace characters')
# Print the percentage of non-blank characters written by the author
info(f' Composes {stats["non-blank"] / float(total_non_blank) * 100:2.0f}% of changes since {start_date.month:02d}/{start_date.day:02d}/{start_date.year}')
if not all_accusations and not args.silence_warnings:
# Print a message if no authors were found
warn('No accusations found. Your filters might have excluded all the available files. There also might not be any commits within the specified date range, or at all. Try using the --verbose flag to see more information. Additionally, you might try adjusting the date range using the --start-date and --end-date flags.')
##################################################
# End
##################################################
if args.verbose and args.output:
info(f'Accusations written to {args.output}.')
##################################################
# Helper functions
##################################################
def analyze_author(author: str, accusations: list[tuple[str, datetime, str]]) -> dict[str, int]:
'''
Analyzes a list of accusations and returns a dictionary of statistics.
Parameters
----------
author : str
The author to filter the accusations by.
accusations : list[tuple[str, datetime, str]]
The list of accusations to analyze.
Each accusation is a tuple of the author, date, and the line content commited.
Returns
-------
dict[str, int]
A dictionary of statistics for the author.
'''
# Convert the author name to lowercase
author = author.lower()
# Filter the accusations by the author
accusations_by_author = [a for a, _, _ in accusations if a.lower() == author]
# Analyze the accusations
stats = analyze_authors(accusations_by_author)
# Return the statistics for the author
if stats.get(author) is not None:
return stats[author]
else:
# If the author has no accusations, return 0 for all statistics
return {'lines': 0, 'chars': 0, 'non-blank': 0, 'two-or-more-char-lines': 0, 'non-blank-lines': 0, 'avg-line-len': 0}
def analyze_authors(accusations: list[tuple[str, datetime, str]]) -> dict[str, dict[str, int]]:
'''
Analyzes a list of accusations and returns a dictionary of users to dictionaries of statistics.
Parameters
----------
accusations : list[tuple[str, datetime, str]]
The list of accusations to analyze.
Each accusation is a tuple of the author, date, and the line content commited.
Returns
-------
dict[str, dict[str, int]]
A dictionary of statistics.
The key of the outer dictionary is the author.
The value of the outer dictionary is a dictionary of statistics for the author.
'''
# The dictionary of author statistics
result = {}
# Calculate the statistics for each author
for author, _, content in accusations:
author = author.lower()
# Get the author's statistics
result.setdefault(author, {'lines': 0, 'chars': 0, 'two-or-more-char-lines': 0, 'non-blank': 0, 'non-blank-lines': 0, 'avg-line-len': 0})
result[author]['lines'] += 1
if content.strip() != '':
# Check that the line is not blank, then increment the non-blank line count
result[author]['non-blank-lines'] += 1
if len(content.strip()) >= 2:
# Check that the line is at least two characters long, then increment the two-or-more-char line count
result[author]['two-or-more-char-lines'] += 1
result[author]['chars'] += len(content)
result[author]['non-blank'] += len(content.replace(' ', '').replace('\t', ''))
# Calculate the average line length for each author
for stats in result.values():
stats['avg-line-len'] = stats['chars'] / stats['lines']
# Return the author statistics
return result
def parse_date(date: str) -> datetime:
'''
Parses a date string into a datetime object.
The date can be formatted either in ISO 8601 format or American date format `m/d/y`.
Parameters
----------
date : str
The date string to parse.
'''
try:
# Try to parse the date as an ISO 8601 date
return datetime.fromisoformat(date)
except ValueError:
# Try to parse the date as an American date `m/d/y`
try:
return datetime.strptime(date, '%m/%d/%Y')
except ValueError:
# If the date cannot be parsed, throw an error
error('date must be in ISO 8601 format or American date format `m/d/y`.')
def get_files(directory = "./", include = ['**/*'], exclude = []) -> list[str]:
'''
Gets a list of file paths from a list of file patterns, excluding files matching other file patterns.
Parameters
----------
directory : str, optional
The directory to search (default is the current directory)
include : list[str], optional
The file patterns to include (default is "**/*")
exclude : list[str], optional
The file patterns to exclude (default is none)
'''
files = set()
directory = abspath(directory)
# Add files matching file patterns
for file_pattern in include:
files.update(glob.glob(directory + "/" + file_pattern, recursive=True))
# Add files in subdirectories
files.update(glob.glob(directory + "/" + file_pattern + "/**/*", recursive=True))
# Remove files matching exclude patterns
for file_pattern in exclude:
files.difference_update(glob.glob(directory + "/" + file_pattern, recursive=True))
# Remove files in subdirectories
files.difference_update(glob.glob(directory + "/" + file_pattern + "/**/*", recursive=True))
return list(files)
def extract_accusations_from_line_porcelain_output(line_porcelain_output: str) -> list[tuple[str, datetime, str]]:
'''
Extracts the author, date, and the line content commited from each line of the `git blame --line-porcelain` output using regex.
The format of the `git blame --line-porcelain` output can be found here:
https://git-scm.com/docs/git-blame#_the_porcelain_format
Parameters
----------
line_porcelain_output : str
The output of `git blame --line-porcelain`.
Returns
-------
list[tuple[str, datetime, str]]
A list of tuples containing the author, date, and commited content for each line.
'''
# The list of accusations to return
accusations = []
# Split the `git blame` output by each starting line of a section.
#
# The first line for each section is composed of:
# 1. 40-byte SHA-1 of the commit the line is attributed to.
# 2. The line number of the line in the original file.
# 3. The line number of the line in the final file.
for i, section in enumerate(re.split('[a-fA-F0-9]{40} \d+ \d+.*', line_porcelain_output)):
# Skip the first section, which is empty
if i == 0:
continue
# Extract the author, date, filename, and content from the section
author = re.search('author (.*)', section).group(1)
date_string = re.search('author-time (\d+)', section).group(1)
date = datetime.fromtimestamp(int(date_string))
content = section.split('\n')[-2].removeprefix('\t')
# Add the accusation to the list
accusations.append((author.lower(), date, content))
# Return the list of accusations
return accusations
def accuse_files(directory: str, files: list[str], verbose: int=0, warnings_disabled=False) -> dict[str, list[tuple[str, datetime, str]]]:
'''
Accuses the author of commits of all files between two dates.
Parameters
----------
directory : str
The directory to search through for commits.
files : list[str]
The files to search through for commits.
verbose : bool, optional
Print verbose output (default is false).
warnings_disabled : bool, optional
Disable warnings (default is false).
Returns
-------
dict[str, list[tuple[str, datetime, str]]]
A dictionary of accusations.
The key is the file path, and the value is a list of accusations.
Each accusation is a tuple of the author, date, and content.
'''
directory = abspath(directory)
# Run git blame on each file
all_accusations = {}
for file in files:
if verbose >= 2:
# Print file information if verbose level is 2 or higher
info(f'Checking {file}...')
# Run git blame
try:
# First attempt UTF-8 to retain unicode characters
line_porcelain_output = subprocess.run(['git', 'blame', '--line-porcelain', file], capture_output=True, text=True, cwd=directory, encoding='latin1').stdout
if verbose >= 3:
info("Valid UTF-8 file.")
except UnicodeDecodeError:
# If the file is not UTF-8, try latin1 as a fallback
if not warnings_disabled:
warn(f'File {file} is not UTF-8. Falling back to latin1 encoding. (You might not want to include this file in your search.)')
line_porcelain_output = subprocess.run(['git', 'blame', '--line-porcelain', file], capture_output=True, text=True, cwd=directory, encoding='latin1').stdout
if verbose >= 3:
# Print debug information if verbose level is 3 or higher
info("Extracting accusations from line porcelain output...")
# Extract author, date, and content from each line using regex.
# Then, add the file accusations to the list of all accusations
all_accusations[file] = extract_accusations_from_line_porcelain_output(line_porcelain_output)
# Return the list of all accusations
return all_accusations
def info(*messages: list[str]):
'''
Prints an info message to stdout.
Parameters
----------
messages : list[str]
The info messages to print.
'''
print(f'{PROGRAM_NAME}: info:', *messages, file=stderr, flush=True)
def warn(*messages: list[str]):
'''
Prints an info message to stdout.
Parameters
----------
messages : list[str]
The info messages to print.
'''
print(f'{PROGRAM_NAME}: warning:\x1b[33m', *messages, '\x1b[0m', file=stderr, flush=True)
def error(*messages: list[str]):
'''
Prints an error message to stderr.
Parameters
----------
messages : list[str]
The error messages to print.
'''
print(f'{PROGRAM_NAME}: error:', *messages, 'see `--help` for more information.', file=stderr)
exit(1)
##################################################
# Main execution
##################################################
if __name__ == '__main__':
# Run the main function if this file is run as a script
main()