From a0325b92730a334089152524c4e9ed553718458b Mon Sep 17 00:00:00 2001 From: Francesco Visin Date: Thu, 26 Oct 2017 20:53:38 +0200 Subject: [PATCH 1/6] Refactor data_augm to modify the dict in place * Do not create pointer seq_x, seq_y. It is easy to introduce bugs when operations on them are not reflected in the original dictioary. * Pass the dataset object rather than all its parameteres. * NOTE: This commit breaks the optical flow. Will be fixed in the next commit. --- dataset_loaders/data_augmentation.py | 141 +++++++++++++++------------ dataset_loaders/parallel_loader.py | 96 +++++++++--------- 2 files changed, 127 insertions(+), 110 deletions(-) diff --git a/dataset_loaders/data_augmentation.py b/dataset_loaders/data_augmentation.py index a9fd527..574b4a6 100644 --- a/dataset_loaders/data_augmentation.py +++ b/dataset_loaders/data_augmentation.py @@ -328,7 +328,9 @@ def apply_warp(x, warp_field, fill_mode='reflect', return x -def random_transform(x, y=None, +def random_transform(dataset, + seq, + prefix_and_fnames=None, rotation_range=0., width_shift_range=0., height_shift_range=0., @@ -360,10 +362,14 @@ def random_transform(x, y=None, Parameters ---------- - x: array of floats - An image. - y: array of int - An array with labels. + dataset: a :class:`Dataset` instance + The instance of the current dataset. First step towards making + this a class method. + seq: a dictionary of numpy array + A dictionary with at least these keys: 'data', 'labels', 'filenames', + 'subset'. + prefix_and_fnames: list + A list of prefix and names for the current sequence rotation_range: int Degrees of rotation (0 to 180). width_shift_range: float @@ -436,15 +442,19 @@ def random_transform(x, y=None, ''' # Set this to a dir, if you want to save augmented images samples save_to_dir = None + nclasses = dataset.nclasses + void_label = dataset.void_labels if rescale: raise NotImplementedError() - # Do not modify the original images - x = x.copy() - if y is not None and len(y) > 0: - y = y[..., None] # Add extra dim to y to simplify computation - y = y.copy() + # Make sure we do not modify the original images + seq['data'] = seq['data'].copy() + if seq['labels'] is not None and len(seq['labels']) > 0: + seq['labels'] = seq['labels'].copy() + # Add extra dim to y to simplify computation + seq['labels'] = seq['labels'][..., None] + sh = seq['data'].shape # listify zoom range if np.isscalar(zoom_range): @@ -464,13 +474,13 @@ def random_transform(x, y=None, # Channel shift if channel_shift_range != 0: - x = random_channel_shift(x, channel_shift_range, rows_idx, cols_idx, - chan_idx) + seq['data'] = random_channel_shift(seq['data'], channel_shift_range, + rows_idx, cols_idx, chan_idx) # Gamma correction if gamma > 0: scale = float(1) - x = ((x / scale) ** gamma) * scale * gain + seq['data'] = ((seq['data'] / scale) ** gamma) * scale * gain # Affine transformations (zoom, rotation, shift, ..) if (rotation_range or height_shift_range or width_shift_range or @@ -488,12 +498,12 @@ def random_transform(x, y=None, # --> Shift/Translation if height_shift_range: tx = (np.random.uniform(-height_shift_range, height_shift_range) * - x.shape[rows_idx]) + sh[rows_idx]) else: tx = 0 if width_shift_range: ty = (np.random.uniform(-width_shift_range, width_shift_range) * - x.shape[cols_idx]) + sh[cols_idx]) else: ty = 0 translation_matrix = np.array([[1, 0, tx], @@ -520,62 +530,66 @@ def random_transform(x, y=None, transform_matrix = np.dot(np.dot(np.dot(rotation_matrix, translation_matrix), shear_matrix), zoom_matrix) - h, w = x.shape[rows_idx], x.shape[cols_idx] + h, w = sh[rows_idx], sh[cols_idx] transform_matrix = transform_matrix_offset_center(transform_matrix, h, w) # Apply all the transformations together - x = apply_transform(x, transform_matrix, fill_mode=fill_mode, - cval=cval, order=1, rows_idx=rows_idx, - cols_idx=cols_idx) - if y is not None and len(y) > 0: - y = apply_transform(y, transform_matrix, fill_mode=fill_mode, - cval=cval_mask, order=0, rows_idx=rows_idx, - cols_idx=cols_idx) + seq['data'] = apply_transform(seq['data'], transform_matrix, + fill_mode=fill_mode, cval=cval, order=1, + rows_idx=rows_idx, cols_idx=cols_idx) + if seq['labels'] is not None and len(seq['labels']) > 0: + seq['labels'] = apply_transform(seq['labels'], + transform_matrix, + fill_mode=fill_mode, + cval=cval_mask, + order=0, + rows_idx=rows_idx, + cols_idx=cols_idx) # Horizontal flip if np.random.random() < horizontal_flip: # 0 = disabled - x = flip_axis(x, cols_idx) - if y is not None and len(y) > 0: - y = flip_axis(y, cols_idx) + seq['data'] = flip_axis(seq['data'], cols_idx) + if seq['labels'] is not None and len(seq['labels']) > 0: + seq['labels'] = flip_axis(seq['labels'], cols_idx) # Vertical flip if np.random.random() < vertical_flip: # 0 = disabled - x = flip_axis(x, rows_idx) - if y is not None and len(y) > 0: - y = flip_axis(y, rows_idx) + seq['data'] = flip_axis(seq['data'], rows_idx) + if seq['labels'] is not None and len(seq['labels']) > 0: + seq['labels'] = flip_axis(seq['labels'], rows_idx) # Spline warp if spline_warp: import SimpleITK as sitk - warp_field = gen_warp_field(shape=(x.shape[rows_idx], - x.shape[cols_idx]), + warp_field = gen_warp_field(shape=(sh[rows_idx], + sh[cols_idx]), sigma=warp_sigma, grid_size=warp_grid_size) - x = apply_warp(x, warp_field, - interpolator=sitk.sitkLinear, - fill_mode=fill_mode, - fill_constant=cval, - rows_idx=rows_idx, cols_idx=cols_idx) - if y is not None and len(y) > 0: - y = np.round(apply_warp(y, warp_field, - interpolator=sitk.sitkNearestNeighbor, - fill_mode=fill_mode, - fill_constant=cval_mask, - rows_idx=rows_idx, cols_idx=cols_idx)) + seq['data'] = apply_warp(seq['data'], warp_field, + interpolator=sitk.sitkLinear, + fill_mode=fill_mode, fill_constant=cval, + rows_idx=rows_idx, cols_idx=cols_idx) + if seq['labels'] is not None and len(seq['labels']) > 0: + # TODO is this round right?? + seq['labels'] = np.round( + apply_warp(seq['labels'], warp_field, + interpolator=sitk.sitkNearestNeighbor, + fill_mode=fill_mode, fill_constant=cval_mask, + rows_idx=rows_idx, cols_idx=cols_idx)) # Crop # Expects axes with shape (..., 0, 1) # TODO: Add center crop if crop_size: # Reshape to (..., 0, 1) - pattern = [el for el in range(x.ndim) if el != rows_idx and + pattern = [el for el in range(seq['data'].ndim) if el != rows_idx and el != cols_idx] + [rows_idx, cols_idx] - inv_pattern = [pattern.index(el) for el in range(x.ndim)] - x = x.transpose(pattern) + inv_pattern = [pattern.index(el) for el in range(seq['data'].ndim)] + seq['data'] = seq['data'].transpose(pattern) crop = list(crop_size) pad = [0, 0] - h, w = x.shape[-2:] + h, w = seq['data'].shape[-2:] # Compute amounts if crop[0] < h: @@ -594,26 +608,29 @@ def random_transform(x, y=None, left, crop[1] = 0, w # Cropping - x = x[..., top:top+crop[0], left:left+crop[1]] - if y is not None and len(y) > 0: - y = y.transpose(pattern) - y = y[..., top:top+crop[0], left:left+crop[1]] + seq['data'] = seq['data'][..., top:top+crop[0], left:left+crop[1]] + if seq['labels'] is not None and len(seq['labels']) > 0: + seq['labels'] = seq['labels'].transpose(pattern) + seq['labels'] = seq['labels'][..., top:top+crop[0], + left:left+crop[1]] # Padding if pad != [0, 0]: - pad_pattern = ((0, 0),) * (x.ndim - 2) + ( + pad_pattern = ((0, 0),) * (seq['data'].ndim - 2) + ( (pad[0]//2, pad[0] - pad[0]//2), (pad[1]//2, pad[1] - pad[1]//2)) - x = np.pad(x, pad_pattern, 'constant') - y = np.pad(y, pad_pattern, 'constant', constant_values=void_label) + seq['data'] = np.pad(seq['data'], pad_pattern, 'constant') + seq['labels'] = np.pad(seq['labels'], pad_pattern, 'constant', + constant_values=void_label) - x = x.transpose(inv_pattern) - if y is not None and len(y) > 0: - y = y.transpose(inv_pattern) + # Reshape to original shape + seq['data'] = seq['data'].transpose(inv_pattern) + if seq['labels'] is not None and len(seq['labels']) > 0: + seq['labels'] = seq['labels'].transpose(inv_pattern) if return_optical_flow: - flow = optical_flow(x, rows_idx, cols_idx, chan_idx, + flow = optical_flow(seq['data'], rows_idx, cols_idx, chan_idx, return_rgb=return_optical_flow == 'rgb') - x = np.concatenate((x, flow), axis=chan_idx) + seq['data'] = np.concatenate((seq['data'], flow), axis=chan_idx) # Save augmented images if save_to_dir: @@ -621,11 +638,9 @@ def random_transform(x, y=None, fname = 'data_augm_{}.png'.format(np.random.randint(1e4)) print ('Save to dir'.format(fname)) cmap = sns.hls_palette(nclasses) - save_img2(x, y, os.path.join(save_to_dir, fname), + save_img2(seq['data'], seq['labels'], os.path.join(save_to_dir, fname), cmap, void_label, rows_idx, cols_idx, chan_idx) # Undo extra dim - if y is not None and len(y) > 0: - y = y[..., 0] - - return x, y + if seq['labels'] is not None and len(seq['labels']) > 0: + seq['labels'] = seq['labels'][..., 0] diff --git a/dataset_loaders/parallel_loader.py b/dataset_loaders/parallel_loader.py index 1151f83..a7ca897 100644 --- a/dataset_loaders/parallel_loader.py +++ b/dataset_loaders/parallel_loader.py @@ -598,9 +598,9 @@ def fetch_from_dataset(self, batch_to_load): batch_ret = {} # Create batches - for el in batch_to_load: + for prefix_and_fnames in batch_to_load: - if el is None: + if prefix_and_fnames is None: # The first element cannot be None, or we wouldn't have # this batch in the first place, so we can safely copy # the last element of the batch for each filename that @@ -611,45 +611,43 @@ def fetch_from_dataset(self, batch_to_load): continue # Load sequence, format is (s, 0, 1, c) - ret = self.load_sequence(el) - assert all(el in ret.keys() + seq = self.load_sequence(prefix_and_fnames) + + # Verify consistency + assert all(el in seq.keys() for el in ('data', 'labels', 'filenames', 'subset')), ( - 'Keys: {}'.format(ret.keys())) + 'Keys: {}'.format(seq.keys())) assert all(isinstance(el, np.ndarray) - for el in (ret['data'], ret['labels'])) - raw_data = ret['data'].copy() - seq_x, seq_y = ret['data'], ret['labels'] + for el in (seq['data'], seq['labels'])) + + seq['raw_data'] = seq['data'].copy() # Per-image normalization + axis = tuple(range(seq['data'].ndim - 1)) if self.remove_per_img_mean: - seq_x -= seq_x.mean(axis=tuple(range(seq_x.ndim - 1)), - keepdims=True) + seq['data'] -= seq['data'].mean(axis=axis, keepdims=True) if self.divide_by_per_img_std: - seq_x /= seq_x.std(axis=tuple(range(seq_x.ndim - 1)), - keepdims=True) + seq['data'] /= seq['data'].std(axis=axis, keepdims=True) # Dataset statistics normalization if self.remove_mean: - seq_x -= getattr(self, 'mean', 0) + seq['data'] -= getattr(self, 'mean', 0) if self.divide_by_std: - seq_x /= getattr(self, 'std', 1) + seq['data'] /= getattr(self, 'std', 1) # Make sure data is in 4D - if seq_x.ndim == 3: - seq_x = seq_x[np.newaxis, ...] - raw_data = raw_data[np.newaxis, ...] - assert seq_x.ndim == 4 + if seq['data'].ndim == 3: + seq['data'] = seq['data'][np.newaxis, ...] + seq['raw_data'] = seq['raw_data'][np.newaxis, ...] + assert seq['data'].ndim == 4 # and labels in 3D if self.set_has_GT: - if seq_y.ndim == 2: - seq_y = seq_y[np.newaxis, ...] - assert seq_y.ndim == 3 + if seq['labels'].ndim == 2: + seq['labels'] = seq['labels'][np.newaxis, ...] + assert seq['labels'].ndim == 3 # Perform data augmentation, if needed - seq_x, seq_y = random_transform( - seq_x, seq_y, - nclasses=self.nclasses, - void_label=self.void_labels, - **self.data_augm_kwargs) + random_transform(self, seq, prefix_and_fnames, + **self.data_augm_kwargs) if self.set_has_GT and self._void_labels != []: # Map all void classes to non_void_nclasses and shift the other @@ -663,52 +661,56 @@ def fetch_from_dataset(self, batch_to_load): # Apply the mapping tmp_class = (-1 if not hasattr(self, 'GTclasses') else max(self.GTclasses) + 1) - seq_y[seq_y == self.non_void_nclasses] = tmp_class + loc = seq['labels'] == self.non_void_nclasses + seq['labels'][loc] = tmp_class for i in sorted(mapping.keys()): if i == self.non_void_nclasses: continue - seq_y[seq_y == i] = mapping[i] + seq['labels'][seq['labels'] == i] = mapping[i] try: - seq_y[seq_y == tmp_class] = mapping[self.non_void_nclasses] + loc = seq['labels'] == tmp_class + seq['labels'][loc] = mapping[self.non_void_nclasses] except KeyError: # none of the original classes was self.non_void_nclasses pass - # Transform targets seq_y to one hot code if return_one_hot + # Transform targets seq['labels'] to one hot code if return_one_hot # is True if self.set_has_GT and self.return_one_hot: nc = (self.non_void_nclasses if self._void_labels == [] else self.non_void_nclasses + 1) - sh = seq_y.shape - seq_y = seq_y.flatten() - seq_y_hot = np.zeros((seq_y.shape[0], nc), - dtype='int32') - seq_y = seq_y.astype('int32') - seq_y_hot[range(seq_y.shape[0]), seq_y] = 1 + sh = seq['labels'].shape + seq_y_flat = seq['labels'].flatten() + seq_y_hot = np.zeros((seq_y_flat.shape[0], nc), dtype='int32') + seq_y_flat = seq_y_flat.astype('int32') + seq_y_hot[range(seq_y_flat.shape[0]), seq_y_flat] = 1 seq_y_hot = seq_y_hot.reshape(sh + (nc,)) - seq_y = seq_y_hot + seq['labels'] = seq_y_hot # Dimshuffle if return_01c is False if not self.return_01c: # s,0,1,c --> s,c,0,1 - seq_x = seq_x.transpose([0, 3, 1, 2]) + seq['data'] = seq['data'].transpose([0, 3, 1, 2]) if self.set_has_GT and self.return_one_hot: - seq_y = seq_y.transpose([0, 3, 1, 2]) - raw_data = raw_data.transpose([0, 3, 1, 2]) + seq['labels'] = seq['labels'].transpose([0, 3, 1, 2]) + seq['raw_data'] = seq['raw_data'].transpose([0, 3, 1, 2]) # Return 4D images if not self.return_sequence: - seq_x = seq_x[0, ...] + seq['data'] = seq['data'][0, ...] if self.set_has_GT: - seq_y = seq_y[0, ...] - raw_data = raw_data[0, ...] + seq['labels'] = seq['labels'][0, ...] + seq['raw_data'] = seq['raw_data'][0, ...] if self.return_0_255: - seq_x = (seq_x * 255).astype('uint8') - ret['data'], ret['labels'] = seq_x, seq_y - ret['raw_data'] = raw_data + seq['data'] = (seq['data'] * 255).astype('uint8') + + # Make sure we are updating the seq array with all the + # modifications + seq['data'], seq['labels'] = seq['data'], seq['labels'] + # Append the data of this batch to the minibatch array - for k, v in ret.iteritems(): + for k, v in seq.iteritems(): batch_ret.setdefault(k, []).append(v) for k, v in batch_ret.iteritems(): From cd2f42879f6b8d8eca203b0039d9d4d9a1d5e9e6 Mon Sep 17 00:00:00 2001 From: Francesco Visin Date: Mon, 5 Jun 2017 15:23:25 +0200 Subject: [PATCH 2/6] Improve optical flow to load/store from disk * Allow to load OF from disk from .npy files * Compute the OF at run time if missing (only Farneback available ATM) * Add parameter to select the OF type * Add parameter to select whether to return OF as RGB or displacement --- README.md | 16 ++ dataset_loaders/data_augmentation.py | 220 +++++++++++++++++++-------- dataset_loaders/parallel_loader.py | 1 + 3 files changed, 177 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index d0b9f4b..3b8d3e3 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,19 @@ If you use this code, please cite: https://gist.github.com/fvisin/7104500ae8b33c3b65798d5d2707ce6c#file-dataset_loaders-bib)) Check the full documentation on: http://dataset_loaders.readthedocs.io +### Optical flow +The dataset loaders can optionally load from disk, or in some cases compute, +the optical flow associated to the video sequences. To do so it looks for a +file in `/OF//prefix/filename.npy>` where prefix is the +name of the subset (or video) as returned by get_names(). If the file is +missing it will try to compute the optical flow for the entire dataset once and +store it on disk. + +At the moment the only optical flow algorithm supported to this end is the +Farneback (requires openCV installed, choose 'Farn' as type), but you can +easily pre-compute the optical flow with your preferred algorithm and then load +it via the dataset loaders. An example code for a few algorithms is provided +[here](https://gist.github.com/marcociccone/593638e932a48df7cfd0afe71052ef1d). +NO SUPPORT WILL BE PROVIDED FOR THIS CODE OR ANY OTHER OPTICAL FLOW CODE NOT +DIRECTLY INTEGRATED IN THIS FRAMEWORK. + diff --git a/dataset_loaders/data_augmentation.py b/dataset_loaders/data_augmentation.py index 574b4a6..229ce00 100644 --- a/dataset_loaders/data_augmentation.py +++ b/dataset_loaders/data_augmentation.py @@ -1,6 +1,8 @@ # Based on # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py import os +import shutil +import warnings import numpy as np import scipy.misc @@ -9,44 +11,59 @@ from skimage import img_as_float -def optical_flow(seq, rows_idx, cols_idx, chan_idx, return_rgb=False): - '''Optical flow +def farn_optical_flow(dataset): + '''Farneback optical flow Takes a 4D array of sequences and returns a 4D array with an RGB optical flow image for each frame in the input''' import cv2 - if seq.ndim != 4: + warnings.warn('Farneback optical flow not stored on disk. It will now be ' + 'computed on the whole dataset and stored on disk.' + 'Time to sit back and get a coffee!') + + # Create a copy of the dataset to iterate on + dataset = dataset.__class__(batch_size=1, + return_01c=True, + return_0_255=True, + shuffle_at_each_epoch=False, + infinite_iterator=False) + + ret = dataset.next() + frame0 = ret['data'] + prefix0 = ret['subset'][0] + if frame0.ndim != 4: raise RuntimeError('Optical flow expected 4 dimensions, got %d' % - seq.ndim) - seq = seq.copy() - seq = (seq * 255).astype('uint8') - # Reshape to channel last: (b*seq, 0, 1, ch) if seq - pattern = [el for el in range(seq.ndim) - if el not in (rows_idx, cols_idx, chan_idx)] - pattern += [rows_idx, cols_idx, chan_idx] - inv_pattern = [pattern.index(el) for el in range(seq.ndim)] - seq = seq.transpose(pattern) - if seq.shape[0] == 1: - raise RuntimeError('Optical flow needs a sequence longer than 1 ' - 'to work') - seq = seq[..., ::-1] # Go BGR for OpenCV - - frame1 = seq[0] - if return_rgb: - flow_seq = np.zeros_like(seq) - hsv = np.zeros_like(frame1) - else: - sh = list(seq.shape) - sh[-1] = 2 - flow_seq = np.zeros(sh) - - frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) # Go to gray + frame0.ndim) + frame0 = frame0[0, ..., ::-1] # go BGR for OpenCV + remove batch dim + frame0 = cv2.cvtColor(frame0, cv2.COLOR_BGR2GRAY) # Go gray flow = None - for i, frame2 in enumerate(seq[1:]): - frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) # Go to gray - flow = cv2.calcOpticalFlowFarneback(prev=frame1, - next=frame2, + of_path = os.path.join(dataset.path, 'OF', 'Farn') + of_shared_path = os.path.join(dataset.shared_path, 'OF', 'Farn') + + for ret in dataset: + frame1 = ret['data'] + filename1 = ret['filenames'][0, 0] + # Strip extension, if any + filename1 = filename1[:-4] + '.'.join(filename1[-4:].split('.')[:-1]) + prefix1 = ret['subset'][0] + + if frame1.ndim != 4: + raise RuntimeError('Optical flow expected 4 dimensions, got %d' % + frame1.ndim) + + frame1 = frame1[0, ..., ::-1] # go BGR for OpenCV + remove batch dim + frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) # Go gray + + if prefix1 != prefix0: + # First frame of a new subset + frame0 = frame1 + prefix0 = prefix1 + continue + + # Compute displacement + flow = cv2.calcOpticalFlowFarneback(prev=frame0, + next=frame1, pyr_scale=0.5, levels=3, winsize=10, @@ -55,24 +72,22 @@ def optical_flow(seq, rows_idx, cols_idx, chan_idx, return_rgb=False): poly_sigma=1.1, flags=0, flow=flow) - mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1], - angleInDegrees=True) - # normalize between 0 and 255 - ang = ang / 360 * 255 - if return_rgb: - hsv[..., 0] = ang - hsv[..., 1] = 255 - hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) - rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) - flow_seq[i+1] = rgb - # Image.fromarray(rgb).show() - # cv2.imwrite('opticalfb.png', frame2) - # cv2.imwrite('opticalhsv.png', bgr) - else: - flow_seq[i+1] = np.stack((ang, mag), 2) - frame1 = frame2 - flow_seq = flow_seq.transpose(inv_pattern) - return flow_seq / 255. # return in [0, 1] + + # Save in the local path + if not os.path.exists(os.path.join(of_path, prefix1)): + os.makedirs(os.path.join(of_path, prefix1)) + # Save the flow as dy, dx + np.save(os.path.join(of_path, prefix1, filename1), flow[..., ::-1]) + # cv2.imwrite(os.path.join(of_path, prefix1, filename1 + '.png'), flow) + frame0 = frame1 + prefix0 = prefix1 + + # Store a copy in shared_path + # TODO there might be a race condition when multiple experiments are + # run and one checks for the existence of the shared path OF dir + # while this copy is happening. + if of_path != of_shared_path: + shutil.copytree(of_path, of_shared_path) def my_label2rgb(labels, cmap, bglabel=None, bg_color=(0., 0., 0.)): @@ -347,10 +362,11 @@ def random_transform(dataset, warp_sigma=0.1, warp_grid_size=3, crop_size=None, - return_optical_flow=False, nclasses=None, gamma=0., gain=1., + return_optical_flow=None, + optical_flow_type='Farn', chan_idx=3, # No batch yet: (s, 0, 1, c) rows_idx=1, # No batch yet: (s, 0, 1, c) cols_idx=2, # No batch yet: (s, 0, 1, c) @@ -415,17 +431,24 @@ def random_transform(dataset, crop_size: tuple The size of crop to be applied to images and masks (after any other transformation). - return_optical_flow: bool - If not False a dense optical flow will be concatenated to the - end of the channel axis of the image. If True, angle and - magnitude will be returned, if set to 'rbg' an RGB representation - will be returned instead. Default: False. nclasses: int The number of classes of the dataset. gamma: float Controls gamma in Gamma correction. gain: float Controls gain in Gamma correction. + return_optical_flow: string + Either 'displacement' or 'rbg'. + If set, a dense optical flow will be retrieved from disk (or + computed when missing) and returned as a 'flow' key. + If 'displacement', the optical flow will be returned as a + two-dimensional array of (dx, dy) displacement. If 'rgb', a + three dimensional RGB array with values in [0, 255] will be + returned. Default: None. + optical_flow_type: string + Indicates the method used to generate the optical flow. The + optical flow is loaded from a specific directory based on this + type. chan_idx: int The index of the channel axis. rows_idx: int @@ -577,6 +600,78 @@ def random_transform(dataset, fill_mode=fill_mode, fill_constant=cval_mask, rows_idx=rows_idx, cols_idx=cols_idx)) + # Optical flow + if return_optical_flow: + return_optical_flow = return_optical_flow.lower() + if return_optical_flow not in ['rgb', 'displacement']: + raise RuntimeError('Unknown return_optical_flow value: %s' % + return_optical_flow) + if prefix_and_fnames is None: + raise RuntimeError('You should specify a list of prefixes ' + 'and filenames') + # Find the filename of the first frame of this prefix + first_frame_of_prefix = sorted(dataset.get_names()[seq['subset']])[0] + + of_base_path = os.path.join(dataset.path, 'OF', optical_flow_type) + if not os.path.isdir(of_base_path): + # The OF is not on disk: compute it and store it + if optical_flow_type != 'Farn': + raise RuntimeError('Unknown optical flow type: %s. For ' + 'optical_flow_type other than Farn ' + 'please run your own implementation ' + 'manually and save it in %s' % + optical_flow_type, of_base_path) + farn_optical_flow(dataset) # Compute and store on disk + + # Load the OF from disk + import skimage + flow = [] + for frame in prefix_and_fnames: + if frame[1] == first_frame_of_prefix: + # It's the first frame of the prefix, there is no + # previous frame to compute the OF with, return a blank one + of = np.zeros(sh[1:], seq['data'].dtype) + flow.append(of) + continue + + # Read from disk + of_path = os.path.join(of_base_path, frame[0], + frame[1].rstrip('.') + '.npy') + if os.path.exists(of_path): + of = np.load(of_path) + else: + raise RuntimeError('Optical flow not found for this ' + 'file: %s' % of_path) + + if return_optical_flow == 'rgb': + # of = of[..., ::-1] + + def cart2pol(x, y): + mag = np.sqrt(x**2 + y**2) + ang = np.arctan2(y, x) # note, in [-pi, pi] + return mag, ang + mag, ang = cart2pol(of[..., 0], of[..., 1]) + + # Normalize to [0, 1] + sh = of.shape[:2] + two_pi = 2 * np.pi + ang = (ang + two_pi) % two_pi / two_pi + mag = mag - mag.min() + mag /= np.float(mag.max()) + + # Convert to RGB [0, 1] + hsv = np.ones((sh[0], sh[1], 3)) + hsv[..., 0] = ang + hsv[..., 2] = mag + of = skimage.color.hsv2rgb(hsv) # HSV --> RGB [0, 1] + of = (of * 255).astype('uint8') + from PIL import Image + import ipdb; ipdb.set_trace() + Image.fromarray(of).show() + + flow.append(np.array(of)) + flow = np.array(flow) + # Crop # Expects axes with shape (..., 0, 1) # TODO: Add center crop @@ -613,6 +708,9 @@ def random_transform(dataset, seq['labels'] = seq['labels'].transpose(pattern) seq['labels'] = seq['labels'][..., top:top+crop[0], left:left+crop[1]] + if return_optical_flow: + flow = flow.transpose(pattern) + flow = flow[..., top:top+crop[0], left:left+crop[1]] # Padding if pad != [0, 0]: pad_pattern = ((0, 0),) * (seq['data'].ndim - 2) + ( @@ -621,16 +719,15 @@ def random_transform(dataset, seq['data'] = np.pad(seq['data'], pad_pattern, 'constant') seq['labels'] = np.pad(seq['labels'], pad_pattern, 'constant', constant_values=void_label) + if return_optical_flow: + flow = np.pad(flow, pad_pattern, 'constant') # pad with zeros # Reshape to original shape seq['data'] = seq['data'].transpose(inv_pattern) if seq['labels'] is not None and len(seq['labels']) > 0: seq['labels'] = seq['labels'].transpose(inv_pattern) - - if return_optical_flow: - flow = optical_flow(seq['data'], rows_idx, cols_idx, chan_idx, - return_rgb=return_optical_flow == 'rgb') - seq['data'] = np.concatenate((seq['data'], flow), axis=chan_idx) + if return_optical_flow: + flow = flow.transpose(inv_pattern) # Save augmented images if save_to_dir: @@ -644,3 +741,6 @@ def random_transform(dataset, # Undo extra dim if seq['labels'] is not None and len(seq['labels']) > 0: seq['labels'] = seq['labels'][..., 0] + + if return_optical_flow: + seq['flow'] = np.array(flow) diff --git a/dataset_loaders/parallel_loader.py b/dataset_loaders/parallel_loader.py index a7ca897..0bcb15e 100644 --- a/dataset_loaders/parallel_loader.py +++ b/dataset_loaders/parallel_loader.py @@ -214,6 +214,7 @@ def __init__(self, # Set default values for the data augmentation params if not specified default_data_augm_kwargs = { 'crop_size': None, + 'return_optical_flow': None, 'rotation_range': 0, 'width_shift_range': 0, 'height_shift_range': 0, From 1a5e620ae3c38f7e5a2271497b3051c0c1e1015e Mon Sep 17 00:00:00 2001 From: Francesco Lattari Date: Fri, 27 Oct 2017 15:30:28 +0200 Subject: [PATCH 3/6] Improved OF visualization * Better OF visualization, adapted from TransFlow --- dataset_loaders/data_augmentation.py | 152 +++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/dataset_loaders/data_augmentation.py b/dataset_loaders/data_augmentation.py index 229ce00..b599125 100644 --- a/dataset_loaders/data_augmentation.py +++ b/dataset_loaders/data_augmentation.py @@ -744,3 +744,155 @@ def cart2pol(x, y): if return_optical_flow: seq['flow'] = np.array(flow) + + +def flow2rgb(flow, frame=None, show_flow_vector_field=False): + ''' + Convert optical flow to RGB image + From: + https://github.com/stefanoalletto/TransFlow/blob/master/ + flowToColor.pyeadapted from + ''' + if len(flow.shape) != 3 or flow.shape[2] != 2: + raise ValueError('The flow should be an array (x, y, c) with c ' + 'containing the 2D displacement.') + + u = flow[:, :, 0] + v = flow[:, :, 1] + # print u.shape,v.shape + maxu = -999. + maxv = -999. + minu = 999. + minv = 999. + maxrad = -1. + + maxu = max(maxu, np.max(u)) + minu = max(minu, np.max(u)) + maxv = max(maxv, np.max(v)) + minv = max(minv, np.max(v)) + rad = np.sqrt((u ** 2. + v ** 2.)) + maxrad = max(maxrad, np.max(rad)) + u = u / (maxrad + 1e-5) + v = v / (maxrad + 1e-5) + if show_flow_vector_field: + mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1]) + img = drawVectorField(frame, mag, rad) + else: + img = computeColor(u, v) + img = img / 255. + # % unknown flow + # IDX = np.repmat(idxUnknown, np.array(np.hstack((1., 1., 3.)))) + # img[int(IDX)-1] = 0. + # return img/255. + return img + +def drawVectorField(frame, mag, rad): + magnitude_hsv = np.zeros(shape=rad.shape + tuple([3])) + magnitude_hsv[..., 2] = np.clip(mag, 0, 10) / 10. + # magnitude_rgb[..., indice / numero di righe + # for i in range(480): + # magnitude_hsv[i, :, 2] = i / 480. + # magnitude_rgb[..., 1] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) + # magnitude_rgb[..., 2] = np.expand_dims(mag, axis=-1)[..., 0] + magnitude_rgb = cv2.cvtColor(np.uint8(magnitude_hsv * 255), + cv2.COLOR_HSV2RGB) + magnitude_rgb[..., 1] = 255 + + white_background = np.ones_like(frame) * 255 + cv2.addWeighted(frame, 0.4, white_background, 0.6, 0, + white_background) + + height = rad.shape[0] + width = rad.shape[1] + + divisor = 12 + vector_length = 10 + + for i in range(height / divisor): + for j in range(width / divisor): + y1 = i * divisor + x1 = j * divisor + vector_length = magnitude_hsv[y1, x1, 2] * 10 + dy = vector_length * np.sin(rad[y1, x1]) + dx = vector_length * np.cos(rad[y1, x1]) + x2 = int(x1 + dx) + y2 = int(y1 + dy) + x2 = np.clip(x2, 0, width) + y2 = np.clip(y2, 0, height) + arrow_color = magnitude_rgb[y1, x1].tolist() + white_background = cv2.arrowedLine( + white_background, (x1, y1), (x2, y2), + arrow_color, 1, tipLength=0.4) + return white_background + +def computeColor(u, v): + img = np.zeros((u.shape[0], u.shape[1], 3)) + # nanIdx = np.logical_or(np.isnan(u), np.isnan(v)) + # u[int(nanIdx)-1] = 0. + # v[int(nanIdx)-1] = 0. + colorwheel, ncols = makeColorwheel() + rad = np.sqrt((u ** 2. + v ** 2.)) + a = np.arctan2((-v), (-u)) / np.pi + fk = np.dot((a + 1.) / 2., ncols - 1.) + # % -1~1 maped to 1~ncols + k0 = np.floor(fk).astype(np.int32) + # % 1, 2, ..., ncols + k1 = k0 + 1 + k1[k1 == ncols] = 1 + f = fk - k0 + + for i in np.arange(colorwheel.shape[-1]): + tmp = colorwheel[:, i] + col0 = tmp[k0] / 255. + col1 = tmp[k1] / 255. + col = (1. - f) * col0 + f * col1 + idx = rad <= 1. + col[idx] = 1. - rad[idx] * (1. - col[idx]) + # % increase saturation with radius + col[rad > 1] = col[rad > 1] * 0.75 + # % out of range + img[:, :, i] = np.floor(255. * col) + return img + +def makeColorwheel(): + + RY = 15 + YG = 6 + GC = 4 + CB = 11 + BM = 13 + MR = 6 + ncols = RY+YG+GC+CB+BM+MR + colorwheel = np.zeros((int(ncols), 3)) + # % r g b + col = 0 + # %RY + colorwheel[0:RY, 0] = 255. + colorwheel[0:RY, 1] = np.floor(255. * np.arange(0., RY) / RY) + col = col + RY + # %YG + colorwheel[col:col+YG, 0] = 255. - np.floor( + 255. * np.arange(0., YG) / YG) + colorwheel[col:col+YG, 1] = 255. + col = col + YG + # %GC + colorwheel[col+0:col+GC, 1] = 255. + colorwheel[col+0:col+GC, 2] = np.floor(255. * np.arange(0., GC) / + GC) + col = col + GC + # %CB + colorwheel[col+0:col+CB, 1] = 255. - np.floor( + 255. * np.arange(0., CB) / CB) + colorwheel[col+0:col+CB, 2] = 255. + col = col + CB + # %BM + colorwheel[col+0:col+BM, 2] = 255. + colorwheel[col+0:col+BM, 0] = np.floor(255. * np.arange(0., BM) / + BM) + col = col + BM + # %MR + colorwheel[col+0:col+MR, 2] = 255. - np.floor( + 255. * np.arange(0., MR) / MR) + colorwheel[col+0:col+MR, 0] = 255. + return colorwheel, ncols + From 439eda9f52922afb146895660a0ce16c510b5f98 Mon Sep 17 00:00:00 2001 From: Francesco Visin Date: Fri, 27 Oct 2017 16:51:19 +0200 Subject: [PATCH 4/6] Cleanup and comment the new OF visualization code --- dataset_loaders/data_augmentation.py | 74 +++++++++++++++++----------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/dataset_loaders/data_augmentation.py b/dataset_loaders/data_augmentation.py index b599125..f96308a 100644 --- a/dataset_loaders/data_augmentation.py +++ b/dataset_loaders/data_augmentation.py @@ -746,24 +746,44 @@ def cart2pol(x, y): seq['flow'] = np.array(flow) -def flow2rgb(flow, frame=None, show_flow_vector_field=False): +def cart2polar(x, y): + '''Roughly equivalent to cv2.cartToPolar''' + mag = np.sqrt(x**2 + y**2) + ang = np.arctan2(y, x) # note, in [-pi, pi] + return mag, ang + + +def flow2rgb(flow, frame=None, return_vec_field=False, return_0_255=True): ''' Convert optical flow to RGB image From: https://github.com/stefanoalletto/TransFlow/blob/master/ - flowToColor.pyeadapted from + flowToColor.py + + Parameters + ---------- + flow: ndarray + A 3D array with the X, Y displacement per pixel + frame: ndarray + An image, used to overlay the vector field if return_vec_field + is True + return_vec_field: bool + If True an image with an overlay of the optical flow vector + field will be returned. Otherwise an RGB image representation of + the optical flow will be returned. Default: False + return_0_255: bool + If True the returned RGB optical flow will be in [0, 255], + otherwise in [0, 1]. Ignored if return_vec_field is True. ''' if len(flow.shape) != 3 or flow.shape[2] != 2: raise ValueError('The flow should be an array (x, y, c) with c ' - 'containing the 2D displacement.') + 'containing the 2D XY-displacement.') u = flow[:, :, 0] v = flow[:, :, 1] - # print u.shape,v.shape - maxu = -999. - maxv = -999. - minu = 999. - minv = 999. + + maxu = maxv = -999. + minu = minv = 999. maxrad = -1. maxu = max(maxu, np.max(u)) @@ -774,33 +794,28 @@ def flow2rgb(flow, frame=None, show_flow_vector_field=False): maxrad = max(maxrad, np.max(rad)) u = u / (maxrad + 1e-5) v = v / (maxrad + 1e-5) - if show_flow_vector_field: - mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1]) + if return_vec_field: + mag, _ = cart2polar(flow[..., 0], flow[..., 1]) img = drawVectorField(frame, mag, rad) else: img = computeColor(u, v) - img = img / 255. - # % unknown flow - # IDX = np.repmat(idxUnknown, np.array(np.hstack((1., 1., 3.)))) - # img[int(IDX)-1] = 0. - # return img/255. + if return_0_255: + img = img.astype('uint8') + else: + img = img / 255. return img + def drawVectorField(frame, mag, rad): + import cv2 magnitude_hsv = np.zeros(shape=rad.shape + tuple([3])) magnitude_hsv[..., 2] = np.clip(mag, 0, 10) / 10. - # magnitude_rgb[..., indice / numero di righe - # for i in range(480): - # magnitude_hsv[i, :, 2] = i / 480. - # magnitude_rgb[..., 1] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) - # magnitude_rgb[..., 2] = np.expand_dims(mag, axis=-1)[..., 0] magnitude_rgb = cv2.cvtColor(np.uint8(magnitude_hsv * 255), cv2.COLOR_HSV2RGB) magnitude_rgb[..., 1] = 255 white_background = np.ones_like(frame) * 255 - cv2.addWeighted(frame, 0.4, white_background, 0.6, 0, - white_background) + cv2.addWeighted(frame, 0.4, white_background, 0.6, 0, white_background) height = rad.shape[0] width = rad.shape[1] @@ -825,6 +840,7 @@ def drawVectorField(frame, mag, rad): arrow_color, 1, tipLength=0.4) return white_background + def computeColor(u, v): img = np.zeros((u.shape[0], u.shape[1], 3)) # nanIdx = np.logical_or(np.isnan(u), np.isnan(v)) @@ -854,8 +870,8 @@ def computeColor(u, v): img[:, :, i] = np.floor(255. * col) return img -def makeColorwheel(): +def makeColorwheel(): RY = 15 YG = 6 GC = 4 @@ -871,8 +887,7 @@ def makeColorwheel(): colorwheel[0:RY, 1] = np.floor(255. * np.arange(0., RY) / RY) col = col + RY # %YG - colorwheel[col:col+YG, 0] = 255. - np.floor( - 255. * np.arange(0., YG) / YG) + colorwheel[col:col+YG, 0] = 255. - np.floor(255. * np.arange(0., YG) / YG) colorwheel[col:col+YG, 1] = 255. col = col + YG # %GC @@ -881,8 +896,8 @@ def makeColorwheel(): GC) col = col + GC # %CB - colorwheel[col+0:col+CB, 1] = 255. - np.floor( - 255. * np.arange(0., CB) / CB) + colorwheel[col+0:col+CB, 1] = 255. - np.floor(255. * np.arange(0., CB) / + CB) colorwheel[col+0:col+CB, 2] = 255. col = col + CB # %BM @@ -891,8 +906,7 @@ def makeColorwheel(): BM) col = col + BM # %MR - colorwheel[col+0:col+MR, 2] = 255. - np.floor( - 255. * np.arange(0., MR) / MR) + colorwheel[col+0:col+MR, 2] = 255. - np.floor(255. * np.arange(0., MR) / + MR) colorwheel[col+0:col+MR, 0] = 255. return colorwheel, ncols - From 863e8ecb7b636997cd62cd417530caa02692ac6f Mon Sep 17 00:00:00 2001 From: Francesco Visin Date: Fri, 27 Oct 2017 17:51:34 +0200 Subject: [PATCH 5/6] Replace numpy OF visualization with advanced one --- dataset_loaders/data_augmentation.py | 66 ++++++++++++++-------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/dataset_loaders/data_augmentation.py b/dataset_loaders/data_augmentation.py index f96308a..7e82b94 100644 --- a/dataset_loaders/data_augmentation.py +++ b/dataset_loaders/data_augmentation.py @@ -624,7 +624,6 @@ def random_transform(dataset, farn_optical_flow(dataset) # Compute and store on disk # Load the OF from disk - import skimage flow = [] for frame in prefix_and_fnames: if frame[1] == first_frame_of_prefix: @@ -634,41 +633,17 @@ def random_transform(dataset, flow.append(of) continue - # Read from disk + # Read the OF from disk of_path = os.path.join(of_base_path, frame[0], frame[1].rstrip('.') + '.npy') - if os.path.exists(of_path): - of = np.load(of_path) - else: - raise RuntimeError('Optical flow not found for this ' - 'file: %s' % of_path) + if not os.path.exists(of_path): + raise RuntimeError('Optical flow not found for this file: %s' % + of_path) + of = np.load(of_path) if return_optical_flow == 'rgb': - # of = of[..., ::-1] - - def cart2pol(x, y): - mag = np.sqrt(x**2 + y**2) - ang = np.arctan2(y, x) # note, in [-pi, pi] - return mag, ang - mag, ang = cart2pol(of[..., 0], of[..., 1]) - - # Normalize to [0, 1] - sh = of.shape[:2] - two_pi = 2 * np.pi - ang = (ang + two_pi) % two_pi / two_pi - mag = mag - mag.min() - mag /= np.float(mag.max()) - - # Convert to RGB [0, 1] - hsv = np.ones((sh[0], sh[1], 3)) - hsv[..., 0] = ang - hsv[..., 2] = mag - of = skimage.color.hsv2rgb(hsv) # HSV --> RGB [0, 1] - of = (of * 255).astype('uint8') - from PIL import Image - import ipdb; ipdb.set_trace() - Image.fromarray(of).show() - + of = of[..., ::-1] # go (dx, dy) as expected by openCV + of = flow2rgb(of, return_0_255=True) flow.append(np.array(of)) flow = np.array(flow) @@ -753,6 +728,33 @@ def cart2polar(x, y): return mag, ang +def flow2rgb_np(flow): + ''' + Convert optical flow to RGB image + + Parameters + ---------- + flow: ndarray + A 3D array with the X, Y displacement per pixel + ''' + import skimage + mag, ang = cart2polar(flow[..., 0], flow[..., 1]) + + # Normalize to [0, 1] + sh = flow.shape[:2] + two_pi = 2 * np.pi + ang = (ang + two_pi) % two_pi / two_pi + mag = mag - mag.min() + mag /= np.float(mag.max()) + + # Convert to RGB [0, 1] + hsv = np.ones((sh[0], sh[1], 3)) + hsv[..., 0] = ang + hsv[..., 2] = mag + of = skimage.color.hsv2rgb(hsv) # HSV --> RGB [0, 1] + of = (of * 255).astype('uint8') + + def flow2rgb(flow, frame=None, return_vec_field=False, return_0_255=True): ''' Convert optical flow to RGB image From 032e7379e4be41fc360f66de5952e7ad02064b69 Mon Sep 17 00:00:00 2001 From: Francesco Visin Date: Mon, 30 Oct 2017 12:13:58 +0100 Subject: [PATCH 6/6] Replace gist with new OF code repo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3b8d3e3..42700d1 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ At the moment the only optical flow algorithm supported to this end is the Farneback (requires openCV installed, choose 'Farn' as type), but you can easily pre-compute the optical flow with your preferred algorithm and then load it via the dataset loaders. An example code for a few algorithms is provided -[here](https://gist.github.com/marcociccone/593638e932a48df7cfd0afe71052ef1d). +[here](https://github.com/marcociccone/opencv_of_gpu). NO SUPPORT WILL BE PROVIDED FOR THIS CODE OR ANY OTHER OPTICAL FLOW CODE NOT DIRECTLY INTEGRATED IN THIS FRAMEWORK.