-
Notifications
You must be signed in to change notification settings - Fork 8
/
inference.py
268 lines (213 loc) · 9.18 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
#!/usr/bin/env python
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import math
import cv2
from imutils.video import FileVideoStream, WebcamVideoStream
import numpy as np
import PIL.Image
import sys, os
import getopt
import time
from tqdm import tqdm
from src.models import piv_liteflownet, hui_liteflownet
from src.utils_data import image_files_from_folder
from src.utils_plot import quiver_plot, write_flow, flowname_modifier
from src.datasets import InferenceRun
# INPUT
args_model = './models/pretrain_torch/Hui-LiteFlowNet.paramOnly'
args_img1 = './images/first.png'
args_img2 = './images/second.png'
args_output = './out.flo'
##########################################################
def estimate(net: torch.nn.Module, img1: torch.Tensor, img2: torch.Tensor, tensor: bool = False):
# Ensure that both the first and second images have the same dimension!
assert (img1.size(2) == img2.size(2))
assert (img1.size(3) == img2.size(3))
input_width = img1.size(3)
input_height = img1.size(2)
# Adaptive width and height
adaptive_width = int(math.floor(math.ceil(input_width / 32.0) * 32.0))
adaptive_height = int(math.floor(math.ceil(input_height / 32.0) * 32.0))
# Scale factor
scale_width = float(input_width) / float(adaptive_width)
scale_height = float(input_height) / float(adaptive_height)
tensor_im1 = torch.nn.functional.interpolate(input=img1, size=(adaptive_height, adaptive_width),
mode='bilinear', align_corners=False)
tensor_im2 = torch.nn.functional.interpolate(input=img2, size=(adaptive_height, adaptive_width),
mode='bilinear', align_corners=False)
# make sure to not compute gradients for computational performance
with torch.set_grad_enabled(False):
net.eval()
tensor_raw_output = net(tensor_im1, tensor_im2)
# Interpolate the flow result back to the desired input size
tensor_flow = torch.nn.functional.interpolate(input=tensor_raw_output, size=(input_height, input_width),
mode='bilinear', align_corners=False)
tensor_flow[:, 0, :, :] *= scale_width
tensor_flow[:, 1, :, :] *= scale_height
if tensor:
return tensor_flow.detach()
else:
output_flow = torch.squeeze(tensor_flow).permute(1, 2, 0).detach().cpu().numpy()
return output_flow
class Inference:
def __init__(self, net, netname=None, output_dir='./outputs', device='cpu'):
if netname is None:
self.netname = 'test'
else:
self.netname = os.path.splitext(os.path.basename(netname))[0]
self.default = os.path.join(output_dir, self.netname)
self.device = device if torch.cuda.is_available() else 'cpu'
self.net = net
def video_parsing(self, vidfile=0, write: bool = True) -> None:
# Define the video type (offline or direct stream)
if isinstance(vidfile, str):
if not os.path.isfile(vidfile):
raise ValueError(f'Input video file is NOT found! At {vidfile}')
window_name = os.path.splitext(os.path.basename(vidfile))[0]
cap = FileVideoStream(vidfile).start()
else:
window_name = 'piv_stream'
cap = WebcamVideoStream(vidfile).start()
time.sleep(2.0) # warming up the input
# create output directory
outdir = os.path.join(self.default, f'vid_{window_name}')
if not os.path.isdir(outdir):
os.makedirs(outdir)
count = 0
prev_frame = None
while True:
curr_frame = cap.read()
if curr_frame is None: # End of the video!
break
if prev_frame is not None:
count += 1
out_flow = self.parser(self.net, prev_frame, curr_frame, device=self.device)
# Post-processing here!
out_name = window_name + '_%06d_out.flo' % count
out_name = os.path.join(outdir, out_name)
if write:
write_flow(out_flow, out_name)
tqdm.write(f'Writing {out_name}')
prev_frame = curr_frame
# displaying the input video
cv2.imshow(window_name, curr_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
cap.stop()
print(f'Finish processing all images from {window_name} video stream!')
print(f'Total {count} frames are processed.')
def images_parsing(self, imgdir: str, pair: bool = True, write: bool = True) -> None:
if isinstance(imgdir, str): # single directory
if not os.path.isdir(imgdir):
raise ValueError(f'Input directory is NOT found! At {imgdir}')
else:
# create output directory
basedir = os.path.basename(imgdir) + '_parse'
outdir = os.path.join(self.default, basedir)
if not os.path.isdir(outdir):
os.makedirs(outdir)
im_files = image_files_from_folder(imgdir, pair=pair)
if pair: # filename as a paired images
for file1 in tqdm(im_files, ncols=100, leave=True, unit='pair', desc=f'Evaluating {imgdir}'):
fbase, fext = os.path.splitext(file1)
file2 = fbase.rsplit('_', 1)[0] + '_img2' + fext
if os.path.isfile(file2):
out_flow = self.parser(self.net,
PIL.Image.open(file1).convert('RGB'),
PIL.Image.open(file2).convert('RGB'),
device=self.device)
# Post-processing here
out_name = flowname_modifier(file1, outdir, pair=pair)
if write:
write_flow(out_flow, out_name)
else: # filename as a sequential frame
prev_frame = None
for curr_frame in tqdm(im_files, ncols=100, leave=True, unit='pair', desc=f'Evaluating {imgdir}'):
if prev_frame is not None:
out_flow = self.parser(self.net,
PIL.Image.open(prev_frame).convert('RGB'),
PIL.Image.open(curr_frame).convert('RGB'),
device=self.device)
# Post-processing here
out_name = flowname_modifier(prev_frame, outdir, pair=pair)
if write:
write_flow(out_flow, out_name)
prev_frame = curr_frame
tqdm.write(f'Finish processing all images from {imgdir} path!')
else:
raise ValueError('Unknown input! Input must be a directory path')
def dataloader_parsing(self, dir: str, pair: bool = True, write: bool = True) -> None:
if not os.path.isdir(dir):
raise ValueError(f'Input directory is NOT found! At {dir}')
basedir = os.path.basename(dir) + '_loader'
outdir = os.path.join(self.default, basedir)
if not os.path.isdir(outdir):
os.makedirs(outdir)
test_dataset = InferenceRun(root=dir, pair=pair)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True)
for images, img_name in tqdm(test_dataloader, ncols=100, leave=True, unit='pair', desc=f'Evaluating {dir}'):
# Add to device
images[0], images[1] = images[0].to(self.device), images[1].to(self.device)
out_flow = estimate(net, images[0], images[1], tensor=False)
# Post-processing here
out_name = flowname_modifier(img_name[0], outdir, pair=pair)
if write:
write_flow(out_flow, out_name)
tqdm.write(f'Done processing {len(test_dataloader)} pairs')
@staticmethod
def parser(net, im1, im2, device='cpu'):
assert im1.size == im2.size
tensor_im1 = transforms.ToTensor()(im1).to(device)
tensor_im2 = transforms.ToTensor()(im2).to(device)
C, H, W = tensor_im1.size()
tensor_im1 = tensor_im1.view(1, C, H, W)
tensor_im2 = tensor_im2.view(1, C, H, W)
out_flow = estimate(net, tensor_im1, tensor_im2)
return out_flow
##########################################################
if __name__ == '__main__':
tic = time.time()
root_model = './models/pretrain_torch'
root_input = './images'
# INPUTS
args_model = os.path.join(root_model, 'PIV-LiteFlowNet-en.paramOnly')
args_vid = os.path.join(root_input, 'stepen_exp_rot32.gif')
args_imdir_pair = os.path.join(root_input, 'pair_cai_SQG')
args_imdir_seq = os.path.join(root_input, 'seq_TA_sbr')
# args_imdir_seq = os.path.join(root_input, 'seq_hiroki_imf108g30-1-25') # 1024 x 1024 takes too much memory!
# args_img1 = os.path.join(root_input, 'DNS_turbulence_img1.tif')
# args_img2 = os.path.join(root_input, 'DNS_turbulence_img2.tif')
# Set the model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if os.path.isfile(args_model):
weights = torch.load(args_model)
else:
raise ValueError('Unknown params input!')
# net = hui_liteflownet(args_model, device=device)
net = piv_liteflownet(weights).to(device)
infer = Inference(net, netname=args_model, device=device)
# infer.images_parsing(args_imdir_seq, pair=False, write=True)
# infer.images_parsing(args_imdir_pair, pair=True, write=True)
# infer.dataloader_parsing(args_imdir_seq, pair=False, write=True)
infer.dataloader_parsing(args_imdir_pair, pair=True, write=True)
# infer.video_parsing(vidfile=0, write=False)
dur = time.time() - tic
tqdm.write(f'Finish processing in {float("{0:.2f}".format(dur))} s!')
# Displaying the results (for manual parser)
out_name = os.path.join(os.path.dirname(args_img1), 'test_piv.flo')
out_name_q = os.path.join(os.path.dirname(args_img1), 'test_piv.png')
out_flow = Inference.parser(net,
PIL.Image.open(args_img1).convert('RGB'),
PIL.Image.open(args_img2).convert('RGB'),
device=device)
write_flow(out_flow, out_name)
u, v = quiver_plot(out_flow, filename=out_name_q)
# Object output (Sniklaus, PyTorch)
object_output = open(args_output, 'wb')
np.array([80, 73, 69, 72], np.uint8).tofile(object_output)
np.array([out_flow.size(2), out_flow.size(1)], np.int32).tofile(object_output)
np.array(out_flow.numpy().transpose(1, 2, 0), np.float32).tofile(object_output)
object_output.close()