-
Notifications
You must be signed in to change notification settings - Fork 1
/
getstills.py
231 lines (174 loc) · 7.76 KB
/
getstills.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
"""
getstills.py
# Overview
Creates periodic stills from a video file, with image filenames including
millisecond timestamps.
# Usage
$ python getstills.py <filename> [period]
The period is expressed in milliseconds.
If no period is specified, the default is 1000 ms.
To extract every frame as a still, specify a period of 0 ms.
# Output
The application creates a directory based on the filename of the video file,
and adds each still to that directory.
Image filenames include the base filename (without the extension) of the
media file, the timestamp (expressed in milliseconds), and the total length
of the video (in milliseconds).
Caveat: Calculation of timestamps assumes that the video has constant
framerate.
"""
# %%
import sys
import os
import av
import json
import argparse
# %%
def extract(video_path, period=1000, first_time=0, last_time=-1, max_stills=-1, hard_break=False, filetype_ext="jpg", prep_ksl=False):
"""Performs extraction of stills from the video and creates an index of
extracted image files in a JSON array.
Filenames have timestamps of, expressed in milliseconds, for each still.
Caveat: Calculation of timestamps assumes that the video has constant framerate.
"""
# %%
"""Variable assignments for testing/debugging
video_path = "../ksl_data/test_videos/cpb-aacip-b45eb62bd60.mp4" #DEBUG
period = 2000 #DEBUG
first_time = 0 #DEBUG
last_time = -1 #DEBUG
max_stills = -1
"""
# Create directory for the project based on the filename of the media
vfilename = os.path.basename(video_path)
fname, ext = os.path.splitext(vfilename)
basename = "stills_" + fname
# If this is for a KSL project, create appropriate directories
if prep_ksl:
proj_dir = "./" + basename + "/"
stills_dir = proj_dir + "images/"
if not os.path.exists(proj_dir):
print("Creating directory:", proj_dir)
os.mkdir(proj_dir)
else:
print("Warning: Project directory exists. Existing data may be overwritten.")
if not os.path.exists(stills_dir):
print("Creating directory:", stills_dir)
os.mkdir(stills_dir)
else:
print("Warning: Stills directory exists. Existing stills may be overwritten.")
else:
proj_dir = "./"
stills_dir = proj_dir
# Print explanatory messages.
print("Using video from", video_path)
print("Starting at", first_time, "ms")
if last_time != -1:
print("Will stop at", last_time, "ms")
if max_stills != -1:
print("Will stop after extracting", max_stills, "stills")
print("Extracting stills every", period, "ms ...")
# Initialize counters for iteration
image_list = []
stills_count = 0
fcount = 0
next_target_time = first_time
# find the first video stream
container = av.open(video_path)
video_stream = next((s for s in container.streams if s.type == 'video'), None)
if video_stream is None:
raise Exception("No video stream found in {}".format(vfilename) )
# get technical stats on the video stream; assumes FPS is constant
fps = video_stream.average_rate.numerator / video_stream.average_rate.denominator
# calculate duration in ms
length = int((video_stream.frames / fps) * 1000)
#%%
# going to loop through every frame in the video stream, starting at the beginning
for frame in container.decode(video_stream):
# deprecated frame time calculation
# This calculation assumes constant FPS
# It also (rarely) differs from frame.time by 1msec even with constant FPS videos
#ftime = int((fcount/fps) * 1000)
ftime = int(frame.time * 1000) # Probably more accurate with variable FPS
# print("fcount:", fcount, "; ftime:", ftime) #DEBUG
# break the loop if we've exceeded the limits
if ( hard_break and
( ( max_stills > -1 and stills_count >= max_stills ) or
( last_time > -1 and ftime > last_time ) ) ):
break
# Grab the first still after the target time index
# (assuming the limits have not been exceeded.)
if ( ( max_stills == -1 or stills_count < max_stills ) and
( last_time == -1 or ftime <= last_time ) and
( ftime >= next_target_time ) ):
ifilename = f'{fname}_{length:08}_{ftime:08}' + "." + filetype_ext
ipathname = stills_dir + ifilename
frame.to_image().save(ipathname)
image_list.append(ifilename)
next_target_time += period
stills_count += 1
fcount += 1
print("Extracted", stills_count, "stills out of", fcount, "video frames.")
container.close()
# If required, create image index array file
if prep_ksl:
print("Creating stills index...")
# first, flesh out the list
image_array = []
for iname in image_list:
image_array.append([iname, False, "", "", False, "", ""])
# convert array to a JSON string
image_array_j = json.dumps(image_array)
# prettify with line breaks
image_array_j = image_array_j.replace("[[", "[\n[")
image_array_j = image_array_j.replace("], [", "], \n[")
image_array_j = image_array_j.replace("]]", "]\n]")
# add bits around the JSON text to make it valid Javascript
image_array_j = "imgArray=\n" + image_array_j
image_array_j = image_array_j + "\n;"
# write Javascript file in current directory
array_pathname = proj_dir + "img_arr_init.js"
with open(array_pathname, "w") as array_file:
array_file.write(image_array_j)
print("Stills index created at " + array_pathname + ".")
print("Done.")
# %%
def main():
app_desc = """getstills.py
Extracts still images from a video file.
Note: All times are expressed in milliseconds.
"""
parser = argparse.ArgumentParser(
prog='python getstills.py',
description=app_desc,
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("video_path", metavar="FILE",
help="Path and filename for the video file")
parser.add_argument("-p", "--period", type=int, default=1000,
help="Extract stills every PERIOD ms. (To extract every frame, use a value of 0.)")
parser.add_argument("-s", "--start", type=int, default=0,
help="Begin extracting at START ms in the video.")
parser.add_argument("-e", "--end", type=int, default=-1,
help="Stop extracting at END ms in video. (Use value of -1 to go to the end of the media.)")
parser.add_argument("-m", "--max", type=int, default=-1,
help="Stop extracting after MAX stills have been saved. Use value of -1 for unliminted.")
parser.add_argument("-b", "--hard_break", action="store_true",
help="Break (instead of looping through all frames) when END time or MAX stills is reached.")
parser.add_argument("-t", "--type", default="jpg", choices=["jpg", "png"],
help="Filename extension for desired output image file type.")
parser.add_argument("-k", "--ksl", action="store_true",
help="Create directories and a KeystrokeLabeler index of extracted stills")
args = parser.parse_args()
if not os.path.exists(args.video_path):
print("Error: Invalid file path.")
print("Run with '-h' for help.")
sys.exit(1)
if not ( args.period == -1 or (args.period >= 0 and args.period <= 86400000) ) :
print("Error: Please enter a sensible value for the period in milliseconds.")
sys.exit(1)
#print(args) #DEBUG
extract(args.video_path, args.period, args.start, args.end,
args.max, args.hard_break, args.type, args.ksl)
# %%
if __name__ == "__main__":
main()