fix tf test and formatting (#1762)

mindee · Oct 25, 2024 · d0eff67 · d0eff67
1 parent e525380
commit d0eff67
Show file tree

Hide file tree

Showing 23 changed files with 50 additions and 63 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v5.0.0
     hooks:
       - id: check-ast
       - id: check-yaml
@@ -16,7 +16,7 @@ repos:
       - id: no-commit-to-branch
         args: ['--branch', 'main']
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.3.2
+    rev: v0.7.1
     hooks:
       - id: ruff
         args: [ --fix ]

diff --git a/doctr/datasets/datasets/pytorch.py b/doctr/datasets/datasets/pytorch.py
@@ -29,14 +29,14 @@ def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
             assert "labels" in target, "Target should contain 'labels' key"
         elif isinstance(target, tuple):
             assert len(target) == 2
-            assert isinstance(target[0], str) or isinstance(
-                target[0], np.ndarray
-            ), "first element of the tuple should be a string or a numpy array"
+            assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), (
+                "first element of the tuple should be a string or a numpy array"
+            )
             assert isinstance(target[1], list), "second element of the tuple should be a list"
         else:
-            assert isinstance(target, str) or isinstance(
-                target, np.ndarray
-            ), "Target should be a string or a numpy array"
+            assert isinstance(target, str) or isinstance(target, np.ndarray), (
+                "Target should be a string or a numpy array"
+            )
 
         # Read image
         img = (

diff --git a/doctr/datasets/datasets/tensorflow.py b/doctr/datasets/datasets/tensorflow.py
@@ -29,14 +29,14 @@ def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
             assert "labels" in target, "Target should contain 'labels' key"
         elif isinstance(target, tuple):
             assert len(target) == 2
-            assert isinstance(target[0], str) or isinstance(
-                target[0], np.ndarray
-            ), "first element of the tuple should be a string or a numpy array"
+            assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), (
+                "first element of the tuple should be a string or a numpy array"
+            )
             assert isinstance(target[1], list), "second element of the tuple should be a list"
         else:
-            assert isinstance(target, str) or isinstance(
-                target, np.ndarray
-            ), "Target should be a string or a numpy array"
+            assert isinstance(target, str) or isinstance(target, np.ndarray), (
+                "Target should be a string or a numpy array"
+            )
 
         # Read image
         img = (

diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py
@@ -103,9 +103,9 @@ def forward(
             # Forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)
 
-        assert all(
-            len(loc_pred) == 1 for loc_pred in loc_preds
-        ), "Detection Model in ocr_predictor should output only one class"
+        assert all(len(loc_pred) == 1 for loc_pred in loc_preds), (
+            "Detection Model in ocr_predictor should output only one class"
+        )
 
         loc_preds = [list(loc_pred.values())[0] for loc_pred in loc_preds]
         # Detach objectness scores from loc_preds

diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py
@@ -103,9 +103,9 @@ def __call__(
             # forward again to get predictions on straight pages
             loc_preds_dict = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
 
-        assert all(
-            len(loc_pred) == 1 for loc_pred in loc_preds_dict
-        ), "Detection Model in ocr_predictor should output only one class"
+        assert all(len(loc_pred) == 1 for loc_pred in loc_preds_dict), (
+            "Detection Model in ocr_predictor should output only one class"
+        )
         loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict]  # type: ignore[union-attr]
         # Detach objectness scores from loc_preds
         loc_preds, objectness_scores = detach_scores(loc_preds)

diff --git a/doctr/transforms/functional/tensorflow.py b/doctr/transforms/functional/tensorflow.py
@@ -140,7 +140,7 @@ def rotate_sample(
     rotated_geoms[..., 0] = rotated_geoms[..., 0] / rotated_img.shape[1]
     rotated_geoms[..., 1] = rotated_geoms[..., 1] / rotated_img.shape[0]
 
-    return rotated_img, np.clip(rotated_geoms, 0, 1)
+    return rotated_img, np.clip(np.around(rotated_geoms, decimals=15), 0, 1)
 
 
 def crop_detection(

diff --git a/doctr/utils/data.py b/doctr/utils/data.py
@@ -112,7 +112,7 @@ def download_from_url(
     except (urllib.error.URLError, IOError) as e:
         if url[:5] == "https":
             url = url.replace("https:", "http:")
-            print("Failed download. Trying https -> http instead." f" Downloading {url} to {file_path}")
+            print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}")
             _urlretrieve(url, file_path)
         else:
             raise e

diff --git a/doctr/utils/metrics.py b/doctr/utils/metrics.py
@@ -392,7 +392,7 @@ def update(
         """
         if gt_boxes.shape[0] != len(gt_labels) or pred_boxes.shape[0] != len(pred_labels):
             raise AssertionError(
-                "there should be the same number of boxes and string both for the ground truth " "and the predictions"
+                "there should be the same number of boxes and string both for the ground truth and the predictions"
             )
 
         # Compute IoU
@@ -525,7 +525,7 @@ def update(
         """
         if gt_boxes.shape[0] != gt_labels.shape[0] or pred_boxes.shape[0] != pred_labels.shape[0]:
             raise AssertionError(
-                "there should be the same number of boxes and string both for the ground truth " "and the predictions"
+                "there should be the same number of boxes and string both for the ground truth and the predictions"
             )
 
         # Compute IoU

diff --git a/references/classification/train_pytorch_character.py b/references/classification/train_pytorch_character.py
@@ -208,7 +208,7 @@ def main(args):
         sampler=SequentialSampler(val_set),
         pin_memory=torch.cuda.is_available(),
     )
-    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
+    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
 
     batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))
 
@@ -272,7 +272,7 @@ def main(args):
         sampler=RandomSampler(train_set),
         pin_memory=torch.cuda.is_available(),
     )
-    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
+    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
 
     if args.show_samples:
         x, target = next(iter(train_loader))

diff --git a/references/classification/train_pytorch_orientation.py b/references/classification/train_pytorch_orientation.py
@@ -216,7 +216,7 @@ def main(args):
         sampler=SequentialSampler(val_set),
         pin_memory=torch.cuda.is_available(),
     )
-    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
+    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
 
     batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))
 
@@ -278,7 +278,7 @@ def main(args):
         sampler=RandomSampler(train_set),
         pin_memory=torch.cuda.is_available(),
     )
-    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
+    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
 
     if args.show_samples:
         x, target = next(iter(train_loader))

diff --git a/references/classification/train_tensorflow_character.py b/references/classification/train_tensorflow_character.py
@@ -170,8 +170,7 @@ def main(args):
         collate_fn=collate_fn,
     )
     print(
-        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
-        f"{val_loader.num_batches} batches)"
+        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
     )
 
     # Load doctr model
@@ -226,8 +225,7 @@ def main(args):
         collate_fn=collate_fn,
     )
     print(
-        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
-        f"{train_loader.num_batches} batches)"
+        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
     )
 
     if args.show_samples:

diff --git a/references/classification/train_tensorflow_orientation.py b/references/classification/train_tensorflow_orientation.py
@@ -181,8 +181,7 @@ def main(args):
         collate_fn=collate_fn,
     )
     print(
-        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
-        f"{val_loader.num_batches} batches)"
+        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
     )
 
     # Load doctr model
@@ -236,8 +235,7 @@ def main(args):
         collate_fn=collate_fn,
     )
     print(
-        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
-        f"{train_loader.num_batches} batches)"
+        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
     )
 
     if args.show_samples:

diff --git a/references/detection/evaluate_pytorch.py b/references/detection/evaluate_pytorch.py
@@ -112,7 +112,7 @@ def main(args):
         pin_memory=torch.cuda.is_available(),
         collate_fn=ds.collate_fn,
     )
-    print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
+    print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")
 
     batch_transforms = Normalize(mean=mean, std=std)
 
@@ -143,8 +143,7 @@ def main(args):
     print("Running evaluation")
     val_loss, recall, precision, mean_iou = evaluate(model, test_loader, batch_transforms, metric, amp=args.amp)
     print(
-        f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | "
-        f"Mean IoU: {mean_iou:.2%})"
+        f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})"
     )
 
 

diff --git a/references/detection/evaluate_tensorflow.py b/references/detection/evaluate_tensorflow.py
@@ -112,7 +112,7 @@ def main(args):
         drop_last=False,
         shuffle=False,
     )
-    print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
+    print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")
 
     batch_transforms = T.Normalize(mean=mean, std=std)
 
@@ -122,8 +122,7 @@ def main(args):
     print("Running evaluation")
     val_loss, recall, precision, mean_iou = evaluate(model, test_loader, batch_transforms, metric)
     print(
-        f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | "
-        f"Mean IoU: {mean_iou:.2%})"
+        f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})"
     )
 
 

diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
@@ -209,7 +209,7 @@ def main(args):
         pin_memory=torch.cuda.is_available(),
         collate_fn=val_set.collate_fn,
     )
-    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
+    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
     with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
         val_hash = hashlib.sha256(f.read()).hexdigest()
 
@@ -316,7 +316,7 @@ def main(args):
         pin_memory=torch.cuda.is_available(),
         collate_fn=train_set.collate_fn,
     )
-    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
+    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
     with open(os.path.join(args.train_path, "labels.json"), "rb") as f:
         train_hash = hashlib.sha256(f.read()).hexdigest()
 

diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
@@ -173,8 +173,7 @@ def main(args):
         drop_last=False,
     )
     print(
-        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
-        f"{val_loader.num_batches} batches)"
+        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
     )
     with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
         val_hash = hashlib.sha256(f.read()).hexdigest()
@@ -269,8 +268,7 @@ def main(args):
         drop_last=True,
     )
     print(
-        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
-        f"{train_loader.num_batches} batches)"
+        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
     )
     with open(os.path.join(args.train_path, "labels.json"), "rb") as f:
         train_hash = hashlib.sha256(f.read()).hexdigest()

diff --git a/references/recognition/evaluate_pytorch.py b/references/recognition/evaluate_pytorch.py
@@ -106,7 +106,7 @@ def main(args):
         pin_memory=torch.cuda.is_available(),
         collate_fn=ds.collate_fn,
     )
-    print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
+    print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")
 
     mean, std = model.cfg["mean"], model.cfg["std"]
     batch_transforms = Normalize(mean=mean, std=std)

diff --git a/references/recognition/evaluate_tensorflow.py b/references/recognition/evaluate_tensorflow.py
@@ -99,7 +99,7 @@ def main(args):
         drop_last=False,
         shuffle=False,
     )
-    print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
+    print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")
 
     mean, std = model.cfg["mean"], model.cfg["std"]
     batch_transforms = T.Normalize(mean=mean, std=std)

diff --git a/references/recognition/train_pytorch.py b/references/recognition/train_pytorch.py
@@ -225,7 +225,7 @@ def main(args):
         pin_memory=torch.cuda.is_available(),
         collate_fn=val_set.collate_fn,
     )
-    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
+    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
 
     batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))
 
@@ -326,7 +326,7 @@ def main(args):
         pin_memory=torch.cuda.is_available(),
         collate_fn=train_set.collate_fn,
     )
-    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
+    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
 
     if args.show_samples:
         x, target = next(iter(train_loader))

diff --git a/references/recognition/train_pytorch_ddp.py b/references/recognition/train_pytorch_ddp.py
@@ -162,9 +162,7 @@ def main(rank: int, world_size: int, args):
             pin_memory=torch.cuda.is_available(),
             collate_fn=val_set.collate_fn,
         )
-        print(
-            f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)"
-        )
+        print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
 
     batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))
 
@@ -266,7 +264,7 @@ def main(rank: int, world_size: int, args):
         pin_memory=torch.cuda.is_available(),
         collate_fn=train_set.collate_fn,
     )
-    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
+    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
 
     if rank == 0 and args.show_samples:
         x, target = next(iter(train_loader))

diff --git a/references/recognition/train_tensorflow.py b/references/recognition/train_tensorflow.py
@@ -181,8 +181,7 @@ def main(args):
         drop_last=False,
     )
     print(
-        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
-        f"{val_loader.num_batches} batches)"
+        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
     )
 
     # Load doctr model
@@ -274,8 +273,7 @@ def main(args):
         drop_last=True,
     )
     print(
-        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
-        f"{train_loader.num_batches} batches)"
+        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
     )
 
     if args.show_samples:

diff --git a/tests/pytorch/test_transforms_pt.py b/tests/pytorch/test_transforms_pt.py
@@ -40,8 +40,7 @@ def test_resize():
     # Symetric padding
     transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
     assert repr(transfo) == (
-        f"Resize(output_size={output_size}, interpolation='bilinear', "
-        f"preserve_aspect_ratio=True, symmetric_pad=True)"
+        f"Resize(output_size={output_size}, interpolation='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)"
     )
     out = transfo(input_t)
     assert out.shape[-2:] == output_size

diff --git a/tests/tensorflow/test_transforms_tf.py b/tests/tensorflow/test_transforms_tf.py
@@ -30,7 +30,7 @@ def test_resize():
     # Symetric padding
     transfo = T.Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
     assert repr(transfo) == (
-        f"Resize(output_size={output_size}, method='bilinear', " f"preserve_aspect_ratio=True, symmetric_pad=True)"
+        f"Resize(output_size={output_size}, method='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)"
     )
     out = transfo(input_t)
     # Asymetric padding