From d0eff67bec16062e2e48c8d1947a532d1518526a Mon Sep 17 00:00:00 2001 From: Felix Dittrich Date: Fri, 25 Oct 2024 20:18:54 +0200 Subject: [PATCH] fix tf test and formatting (#1762) --- .pre-commit-config.yaml | 4 ++-- doctr/datasets/datasets/pytorch.py | 12 ++++++------ doctr/datasets/datasets/tensorflow.py | 12 ++++++------ doctr/models/predictor/pytorch.py | 6 +++--- doctr/models/predictor/tensorflow.py | 6 +++--- doctr/transforms/functional/tensorflow.py | 2 +- doctr/utils/data.py | 2 +- doctr/utils/metrics.py | 4 ++-- references/classification/train_pytorch_character.py | 4 ++-- .../classification/train_pytorch_orientation.py | 4 ++-- .../classification/train_tensorflow_character.py | 6 ++---- .../classification/train_tensorflow_orientation.py | 6 ++---- references/detection/evaluate_pytorch.py | 5 ++--- references/detection/evaluate_tensorflow.py | 5 ++--- references/detection/train_pytorch.py | 4 ++-- references/detection/train_tensorflow.py | 6 ++---- references/recognition/evaluate_pytorch.py | 2 +- references/recognition/evaluate_tensorflow.py | 2 +- references/recognition/train_pytorch.py | 4 ++-- references/recognition/train_pytorch_ddp.py | 6 ++---- references/recognition/train_tensorflow.py | 6 ++---- tests/pytorch/test_transforms_pt.py | 3 +-- tests/tensorflow/test_transforms_tf.py | 2 +- 23 files changed, 50 insertions(+), 63 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e66bb443e..bbbdbdf2b1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: check-ast - id: check-yaml @@ -16,7 +16,7 @@ repos: - id: no-commit-to-branch args: ['--branch', 'main'] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.2 + rev: v0.7.1 hooks: - id: ruff args: [ --fix ] diff --git a/doctr/datasets/datasets/pytorch.py b/doctr/datasets/datasets/pytorch.py index bd4d840168..c24b1c2a8e 100644 --- a/doctr/datasets/datasets/pytorch.py +++ b/doctr/datasets/datasets/pytorch.py @@ -29,14 +29,14 @@ def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]: assert "labels" in target, "Target should contain 'labels' key" elif isinstance(target, tuple): assert len(target) == 2 - assert isinstance(target[0], str) or isinstance( - target[0], np.ndarray - ), "first element of the tuple should be a string or a numpy array" + assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), ( + "first element of the tuple should be a string or a numpy array" + ) assert isinstance(target[1], list), "second element of the tuple should be a list" else: - assert isinstance(target, str) or isinstance( - target, np.ndarray - ), "Target should be a string or a numpy array" + assert isinstance(target, str) or isinstance(target, np.ndarray), ( + "Target should be a string or a numpy array" + ) # Read image img = ( diff --git a/doctr/datasets/datasets/tensorflow.py b/doctr/datasets/datasets/tensorflow.py index 86b7b79289..6e19ca2109 100644 --- a/doctr/datasets/datasets/tensorflow.py +++ b/doctr/datasets/datasets/tensorflow.py @@ -29,14 +29,14 @@ def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]: assert "labels" in target, "Target should contain 'labels' key" elif isinstance(target, tuple): assert len(target) == 2 - assert isinstance(target[0], str) or isinstance( - target[0], np.ndarray - ), "first element of the tuple should be a string or a numpy array" + assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), ( + "first element of the tuple should be a string or a numpy array" + ) assert isinstance(target[1], list), "second element of the tuple should be a list" else: - assert isinstance(target, str) or isinstance( - target, np.ndarray - ), "Target should be a string or a numpy array" + assert isinstance(target, str) or isinstance(target, np.ndarray), ( + "Target should be a string or a numpy array" + ) # Read image img = ( diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index 326b89e5ff..b47a71449d 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -103,9 +103,9 @@ def forward( # Forward again to get predictions on straight pages loc_preds = self.det_predictor(pages, **kwargs) - assert all( - len(loc_pred) == 1 for loc_pred in loc_preds - ), "Detection Model in ocr_predictor should output only one class" + assert all(len(loc_pred) == 1 for loc_pred in loc_preds), ( + "Detection Model in ocr_predictor should output only one class" + ) loc_preds = [list(loc_pred.values())[0] for loc_pred in loc_preds] # Detach objectness scores from loc_preds diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 8f58062fd5..1392943bc4 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -103,9 +103,9 @@ def __call__( # forward again to get predictions on straight pages loc_preds_dict = self.det_predictor(pages, **kwargs) # type: ignore[assignment] - assert all( - len(loc_pred) == 1 for loc_pred in loc_preds_dict - ), "Detection Model in ocr_predictor should output only one class" + assert all(len(loc_pred) == 1 for loc_pred in loc_preds_dict), ( + "Detection Model in ocr_predictor should output only one class" + ) loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict] # type: ignore[union-attr] # Detach objectness scores from loc_preds loc_preds, objectness_scores = detach_scores(loc_preds) diff --git a/doctr/transforms/functional/tensorflow.py b/doctr/transforms/functional/tensorflow.py index 27336089b4..1fbc05096e 100644 --- a/doctr/transforms/functional/tensorflow.py +++ b/doctr/transforms/functional/tensorflow.py @@ -140,7 +140,7 @@ def rotate_sample( rotated_geoms[..., 0] = rotated_geoms[..., 0] / rotated_img.shape[1] rotated_geoms[..., 1] = rotated_geoms[..., 1] / rotated_img.shape[0] - return rotated_img, np.clip(rotated_geoms, 0, 1) + return rotated_img, np.clip(np.around(rotated_geoms, decimals=15), 0, 1) def crop_detection( diff --git a/doctr/utils/data.py b/doctr/utils/data.py index 7aec7720d8..aca34801bb 100644 --- a/doctr/utils/data.py +++ b/doctr/utils/data.py @@ -112,7 +112,7 @@ def download_from_url( except (urllib.error.URLError, IOError) as e: if url[:5] == "https": url = url.replace("https:", "http:") - print("Failed download. Trying https -> http instead." f" Downloading {url} to {file_path}") + print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}") _urlretrieve(url, file_path) else: raise e diff --git a/doctr/utils/metrics.py b/doctr/utils/metrics.py index 6947298ede..4fe3d59ebe 100644 --- a/doctr/utils/metrics.py +++ b/doctr/utils/metrics.py @@ -392,7 +392,7 @@ def update( """ if gt_boxes.shape[0] != len(gt_labels) or pred_boxes.shape[0] != len(pred_labels): raise AssertionError( - "there should be the same number of boxes and string both for the ground truth " "and the predictions" + "there should be the same number of boxes and string both for the ground truth and the predictions" ) # Compute IoU @@ -525,7 +525,7 @@ def update( """ if gt_boxes.shape[0] != gt_labels.shape[0] or pred_boxes.shape[0] != pred_labels.shape[0]: raise AssertionError( - "there should be the same number of boxes and string both for the ground truth " "and the predictions" + "there should be the same number of boxes and string both for the ground truth and the predictions" ) # Compute IoU diff --git a/references/classification/train_pytorch_character.py b/references/classification/train_pytorch_character.py index 9e1acb2696..14bb749066 100644 --- a/references/classification/train_pytorch_character.py +++ b/references/classification/train_pytorch_character.py @@ -208,7 +208,7 @@ def main(args): sampler=SequentialSampler(val_set), pin_memory=torch.cuda.is_available(), ) - print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") + print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)") batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)) @@ -272,7 +272,7 @@ def main(args): sampler=RandomSampler(train_set), pin_memory=torch.cuda.is_available(), ) - print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") + print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)") if args.show_samples: x, target = next(iter(train_loader)) diff --git a/references/classification/train_pytorch_orientation.py b/references/classification/train_pytorch_orientation.py index 8324f0aa37..d3a8a57fa2 100644 --- a/references/classification/train_pytorch_orientation.py +++ b/references/classification/train_pytorch_orientation.py @@ -216,7 +216,7 @@ def main(args): sampler=SequentialSampler(val_set), pin_memory=torch.cuda.is_available(), ) - print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") + print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)") batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)) @@ -278,7 +278,7 @@ def main(args): sampler=RandomSampler(train_set), pin_memory=torch.cuda.is_available(), ) - print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") + print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)") if args.show_samples: x, target = next(iter(train_loader)) diff --git a/references/classification/train_tensorflow_character.py b/references/classification/train_tensorflow_character.py index 0b1b648d93..3049e60ecd 100644 --- a/references/classification/train_tensorflow_character.py +++ b/references/classification/train_tensorflow_character.py @@ -170,8 +170,7 @@ def main(args): collate_fn=collate_fn, ) print( - f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " - f"{val_loader.num_batches} batches)" + f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)" ) # Load doctr model @@ -226,8 +225,7 @@ def main(args): collate_fn=collate_fn, ) print( - f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " - f"{train_loader.num_batches} batches)" + f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)" ) if args.show_samples: diff --git a/references/classification/train_tensorflow_orientation.py b/references/classification/train_tensorflow_orientation.py index 297a5674f4..05ae7fce96 100644 --- a/references/classification/train_tensorflow_orientation.py +++ b/references/classification/train_tensorflow_orientation.py @@ -181,8 +181,7 @@ def main(args): collate_fn=collate_fn, ) print( - f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " - f"{val_loader.num_batches} batches)" + f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)" ) # Load doctr model @@ -236,8 +235,7 @@ def main(args): collate_fn=collate_fn, ) print( - f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " - f"{train_loader.num_batches} batches)" + f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)" ) if args.show_samples: diff --git a/references/detection/evaluate_pytorch.py b/references/detection/evaluate_pytorch.py index 10b20e40cc..0cc0da1a7c 100644 --- a/references/detection/evaluate_pytorch.py +++ b/references/detection/evaluate_pytorch.py @@ -112,7 +112,7 @@ def main(args): pin_memory=torch.cuda.is_available(), collate_fn=ds.collate_fn, ) - print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)") + print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)") batch_transforms = Normalize(mean=mean, std=std) @@ -143,8 +143,7 @@ def main(args): print("Running evaluation") val_loss, recall, precision, mean_iou = evaluate(model, test_loader, batch_transforms, metric, amp=args.amp) print( - f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | " - f"Mean IoU: {mean_iou:.2%})" + f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})" ) diff --git a/references/detection/evaluate_tensorflow.py b/references/detection/evaluate_tensorflow.py index a2c5bbe49c..2a7d32425f 100644 --- a/references/detection/evaluate_tensorflow.py +++ b/references/detection/evaluate_tensorflow.py @@ -112,7 +112,7 @@ def main(args): drop_last=False, shuffle=False, ) - print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)") + print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)") batch_transforms = T.Normalize(mean=mean, std=std) @@ -122,8 +122,7 @@ def main(args): print("Running evaluation") val_loss, recall, precision, mean_iou = evaluate(model, test_loader, batch_transforms, metric) print( - f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | " - f"Mean IoU: {mean_iou:.2%})" + f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})" ) diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py index 8682ddb60f..8d1bfa4499 100644 --- a/references/detection/train_pytorch.py +++ b/references/detection/train_pytorch.py @@ -209,7 +209,7 @@ def main(args): pin_memory=torch.cuda.is_available(), collate_fn=val_set.collate_fn, ) - print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") + print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)") with open(os.path.join(args.val_path, "labels.json"), "rb") as f: val_hash = hashlib.sha256(f.read()).hexdigest() @@ -316,7 +316,7 @@ def main(args): pin_memory=torch.cuda.is_available(), collate_fn=train_set.collate_fn, ) - print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") + print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)") with open(os.path.join(args.train_path, "labels.json"), "rb") as f: train_hash = hashlib.sha256(f.read()).hexdigest() diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py index f054879e8f..561447c5f7 100644 --- a/references/detection/train_tensorflow.py +++ b/references/detection/train_tensorflow.py @@ -173,8 +173,7 @@ def main(args): drop_last=False, ) print( - f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " - f"{val_loader.num_batches} batches)" + f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)" ) with open(os.path.join(args.val_path, "labels.json"), "rb") as f: val_hash = hashlib.sha256(f.read()).hexdigest() @@ -269,8 +268,7 @@ def main(args): drop_last=True, ) print( - f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " - f"{train_loader.num_batches} batches)" + f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)" ) with open(os.path.join(args.train_path, "labels.json"), "rb") as f: train_hash = hashlib.sha256(f.read()).hexdigest() diff --git a/references/recognition/evaluate_pytorch.py b/references/recognition/evaluate_pytorch.py index 1963b806f3..fee747759d 100644 --- a/references/recognition/evaluate_pytorch.py +++ b/references/recognition/evaluate_pytorch.py @@ -106,7 +106,7 @@ def main(args): pin_memory=torch.cuda.is_available(), collate_fn=ds.collate_fn, ) - print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)") + print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)") mean, std = model.cfg["mean"], model.cfg["std"] batch_transforms = Normalize(mean=mean, std=std) diff --git a/references/recognition/evaluate_tensorflow.py b/references/recognition/evaluate_tensorflow.py index b6ca50b516..6946236701 100644 --- a/references/recognition/evaluate_tensorflow.py +++ b/references/recognition/evaluate_tensorflow.py @@ -99,7 +99,7 @@ def main(args): drop_last=False, shuffle=False, ) - print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)") + print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)") mean, std = model.cfg["mean"], model.cfg["std"] batch_transforms = T.Normalize(mean=mean, std=std) diff --git a/references/recognition/train_pytorch.py b/references/recognition/train_pytorch.py index 9290b88c8e..608c5d4145 100644 --- a/references/recognition/train_pytorch.py +++ b/references/recognition/train_pytorch.py @@ -225,7 +225,7 @@ def main(args): pin_memory=torch.cuda.is_available(), collate_fn=val_set.collate_fn, ) - print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") + print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)") batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)) @@ -326,7 +326,7 @@ def main(args): pin_memory=torch.cuda.is_available(), collate_fn=train_set.collate_fn, ) - print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") + print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)") if args.show_samples: x, target = next(iter(train_loader)) diff --git a/references/recognition/train_pytorch_ddp.py b/references/recognition/train_pytorch_ddp.py index 416bab0cb3..e7d71a30bf 100644 --- a/references/recognition/train_pytorch_ddp.py +++ b/references/recognition/train_pytorch_ddp.py @@ -162,9 +162,7 @@ def main(rank: int, world_size: int, args): pin_memory=torch.cuda.is_available(), collate_fn=val_set.collate_fn, ) - print( - f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)" - ) + print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)") batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)) @@ -266,7 +264,7 @@ def main(rank: int, world_size: int, args): pin_memory=torch.cuda.is_available(), collate_fn=train_set.collate_fn, ) - print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") + print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)") if rank == 0 and args.show_samples: x, target = next(iter(train_loader)) diff --git a/references/recognition/train_tensorflow.py b/references/recognition/train_tensorflow.py index 348f3a3869..8a6a9e1e01 100644 --- a/references/recognition/train_tensorflow.py +++ b/references/recognition/train_tensorflow.py @@ -181,8 +181,7 @@ def main(args): drop_last=False, ) print( - f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " - f"{val_loader.num_batches} batches)" + f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)" ) # Load doctr model @@ -274,8 +273,7 @@ def main(args): drop_last=True, ) print( - f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " - f"{train_loader.num_batches} batches)" + f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)" ) if args.show_samples: diff --git a/tests/pytorch/test_transforms_pt.py b/tests/pytorch/test_transforms_pt.py index 3c11412556..15e60c6f1c 100644 --- a/tests/pytorch/test_transforms_pt.py +++ b/tests/pytorch/test_transforms_pt.py @@ -40,8 +40,7 @@ def test_resize(): # Symetric padding transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True) assert repr(transfo) == ( - f"Resize(output_size={output_size}, interpolation='bilinear', " - f"preserve_aspect_ratio=True, symmetric_pad=True)" + f"Resize(output_size={output_size}, interpolation='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)" ) out = transfo(input_t) assert out.shape[-2:] == output_size diff --git a/tests/tensorflow/test_transforms_tf.py b/tests/tensorflow/test_transforms_tf.py index 5fa87eab8a..a923d679ff 100644 --- a/tests/tensorflow/test_transforms_tf.py +++ b/tests/tensorflow/test_transforms_tf.py @@ -30,7 +30,7 @@ def test_resize(): # Symetric padding transfo = T.Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True) assert repr(transfo) == ( - f"Resize(output_size={output_size}, method='bilinear', " f"preserve_aspect_ratio=True, symmetric_pad=True)" + f"Resize(output_size={output_size}, method='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)" ) out = transfo(input_t) # Asymetric padding