Skip to content

Commit

Permalink
fix tf test and formatting (#1762)
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 authored Oct 25, 2024
1 parent e525380 commit d0eff67
Show file tree
Hide file tree
Showing 23 changed files with 50 additions and 63 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: check-ast
- id: check-yaml
Expand All @@ -16,7 +16,7 @@ repos:
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.2
rev: v0.7.1
hooks:
- id: ruff
args: [ --fix ]
Expand Down
12 changes: 6 additions & 6 deletions doctr/datasets/datasets/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
assert "labels" in target, "Target should contain 'labels' key"
elif isinstance(target, tuple):
assert len(target) == 2
assert isinstance(target[0], str) or isinstance(
target[0], np.ndarray
), "first element of the tuple should be a string or a numpy array"
assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), (
"first element of the tuple should be a string or a numpy array"
)
assert isinstance(target[1], list), "second element of the tuple should be a list"
else:
assert isinstance(target, str) or isinstance(
target, np.ndarray
), "Target should be a string or a numpy array"
assert isinstance(target, str) or isinstance(target, np.ndarray), (
"Target should be a string or a numpy array"
)

# Read image
img = (
Expand Down
12 changes: 6 additions & 6 deletions doctr/datasets/datasets/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
assert "labels" in target, "Target should contain 'labels' key"
elif isinstance(target, tuple):
assert len(target) == 2
assert isinstance(target[0], str) or isinstance(
target[0], np.ndarray
), "first element of the tuple should be a string or a numpy array"
assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), (
"first element of the tuple should be a string or a numpy array"
)
assert isinstance(target[1], list), "second element of the tuple should be a list"
else:
assert isinstance(target, str) or isinstance(
target, np.ndarray
), "Target should be a string or a numpy array"
assert isinstance(target, str) or isinstance(target, np.ndarray), (
"Target should be a string or a numpy array"
)

# Read image
img = (
Expand Down
6 changes: 3 additions & 3 deletions doctr/models/predictor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ def forward(
# Forward again to get predictions on straight pages
loc_preds = self.det_predictor(pages, **kwargs)

assert all(
len(loc_pred) == 1 for loc_pred in loc_preds
), "Detection Model in ocr_predictor should output only one class"
assert all(len(loc_pred) == 1 for loc_pred in loc_preds), (
"Detection Model in ocr_predictor should output only one class"
)

loc_preds = [list(loc_pred.values())[0] for loc_pred in loc_preds]
# Detach objectness scores from loc_preds
Expand Down
6 changes: 3 additions & 3 deletions doctr/models/predictor/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ def __call__(
# forward again to get predictions on straight pages
loc_preds_dict = self.det_predictor(pages, **kwargs) # type: ignore[assignment]

assert all(
len(loc_pred) == 1 for loc_pred in loc_preds_dict
), "Detection Model in ocr_predictor should output only one class"
assert all(len(loc_pred) == 1 for loc_pred in loc_preds_dict), (
"Detection Model in ocr_predictor should output only one class"
)
loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict] # type: ignore[union-attr]
# Detach objectness scores from loc_preds
loc_preds, objectness_scores = detach_scores(loc_preds)
Expand Down
2 changes: 1 addition & 1 deletion doctr/transforms/functional/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def rotate_sample(
rotated_geoms[..., 0] = rotated_geoms[..., 0] / rotated_img.shape[1]
rotated_geoms[..., 1] = rotated_geoms[..., 1] / rotated_img.shape[0]

return rotated_img, np.clip(rotated_geoms, 0, 1)
return rotated_img, np.clip(np.around(rotated_geoms, decimals=15), 0, 1)


def crop_detection(
Expand Down
2 changes: 1 addition & 1 deletion doctr/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def download_from_url(
except (urllib.error.URLError, IOError) as e:
if url[:5] == "https":
url = url.replace("https:", "http:")
print("Failed download. Trying https -> http instead." f" Downloading {url} to {file_path}")
print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}")
_urlretrieve(url, file_path)
else:
raise e
Expand Down
4 changes: 2 additions & 2 deletions doctr/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def update(
"""
if gt_boxes.shape[0] != len(gt_labels) or pred_boxes.shape[0] != len(pred_labels):
raise AssertionError(
"there should be the same number of boxes and string both for the ground truth " "and the predictions"
"there should be the same number of boxes and string both for the ground truth and the predictions"
)

# Compute IoU
Expand Down Expand Up @@ -525,7 +525,7 @@ def update(
"""
if gt_boxes.shape[0] != gt_labels.shape[0] or pred_boxes.shape[0] != pred_labels.shape[0]:
raise AssertionError(
"there should be the same number of boxes and string both for the ground truth " "and the predictions"
"there should be the same number of boxes and string both for the ground truth and the predictions"
)

# Compute IoU
Expand Down
4 changes: 2 additions & 2 deletions references/classification/train_pytorch_character.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def main(args):
sampler=SequentialSampler(val_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand Down Expand Up @@ -272,7 +272,7 @@ def main(args):
sampler=RandomSampler(train_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if args.show_samples:
x, target = next(iter(train_loader))
Expand Down
4 changes: 2 additions & 2 deletions references/classification/train_pytorch_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def main(args):
sampler=SequentialSampler(val_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand Down Expand Up @@ -278,7 +278,7 @@ def main(args):
sampler=RandomSampler(train_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if args.show_samples:
x, target = next(iter(train_loader))
Expand Down
6 changes: 2 additions & 4 deletions references/classification/train_tensorflow_character.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ def main(args):
collate_fn=collate_fn,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
f"{val_loader.num_batches} batches)"
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)

# Load doctr model
Expand Down Expand Up @@ -226,8 +225,7 @@ def main(args):
collate_fn=collate_fn,
)
print(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
f"{train_loader.num_batches} batches)"
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)

if args.show_samples:
Expand Down
6 changes: 2 additions & 4 deletions references/classification/train_tensorflow_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ def main(args):
collate_fn=collate_fn,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
f"{val_loader.num_batches} batches)"
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)

# Load doctr model
Expand Down Expand Up @@ -236,8 +235,7 @@ def main(args):
collate_fn=collate_fn,
)
print(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
f"{train_loader.num_batches} batches)"
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)

if args.show_samples:
Expand Down
5 changes: 2 additions & 3 deletions references/detection/evaluate_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=ds.collate_fn,
)
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")

batch_transforms = Normalize(mean=mean, std=std)

Expand Down Expand Up @@ -143,8 +143,7 @@ def main(args):
print("Running evaluation")
val_loss, recall, precision, mean_iou = evaluate(model, test_loader, batch_transforms, metric, amp=args.amp)
print(
f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | "
f"Mean IoU: {mean_iou:.2%})"
f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})"
)


Expand Down
5 changes: 2 additions & 3 deletions references/detection/evaluate_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def main(args):
drop_last=False,
shuffle=False,
)
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")

batch_transforms = T.Normalize(mean=mean, std=std)

Expand All @@ -122,8 +122,7 @@ def main(args):
print("Running evaluation")
val_loss, recall, precision, mean_iou = evaluate(model, test_loader, batch_transforms, metric)
print(
f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | "
f"Mean IoU: {mean_iou:.2%})"
f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})"
)


Expand Down
4 changes: 2 additions & 2 deletions references/detection/train_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=val_set.collate_fn,
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
val_hash = hashlib.sha256(f.read()).hexdigest()

Expand Down Expand Up @@ -316,7 +316,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=train_set.collate_fn,
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
with open(os.path.join(args.train_path, "labels.json"), "rb") as f:
train_hash = hashlib.sha256(f.read()).hexdigest()

Expand Down
6 changes: 2 additions & 4 deletions references/detection/train_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,7 @@ def main(args):
drop_last=False,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
f"{val_loader.num_batches} batches)"
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)
with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
val_hash = hashlib.sha256(f.read()).hexdigest()
Expand Down Expand Up @@ -269,8 +268,7 @@ def main(args):
drop_last=True,
)
print(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
f"{train_loader.num_batches} batches)"
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)
with open(os.path.join(args.train_path, "labels.json"), "rb") as f:
train_hash = hashlib.sha256(f.read()).hexdigest()
Expand Down
2 changes: 1 addition & 1 deletion references/recognition/evaluate_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=ds.collate_fn,
)
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")

mean, std = model.cfg["mean"], model.cfg["std"]
batch_transforms = Normalize(mean=mean, std=std)
Expand Down
2 changes: 1 addition & 1 deletion references/recognition/evaluate_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def main(args):
drop_last=False,
shuffle=False,
)
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in " f"{len(test_loader)} batches)")
print(f"Test set loaded in {time.time() - st:.4}s ({len(ds)} samples in {len(test_loader)} batches)")

mean, std = model.cfg["mean"], model.cfg["std"]
batch_transforms = T.Normalize(mean=mean, std=std)
Expand Down
4 changes: 2 additions & 2 deletions references/recognition/train_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=val_set.collate_fn,
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand Down Expand Up @@ -326,7 +326,7 @@ def main(args):
pin_memory=torch.cuda.is_available(),
collate_fn=train_set.collate_fn,
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if args.show_samples:
x, target = next(iter(train_loader))
Expand Down
6 changes: 2 additions & 4 deletions references/recognition/train_pytorch_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,7 @@ def main(rank: int, world_size: int, args):
pin_memory=torch.cuda.is_available(),
collate_fn=val_set.collate_fn,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)"
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand Down Expand Up @@ -266,7 +264,7 @@ def main(rank: int, world_size: int, args):
pin_memory=torch.cuda.is_available(),
collate_fn=train_set.collate_fn,
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if rank == 0 and args.show_samples:
x, target = next(iter(train_loader))
Expand Down
6 changes: 2 additions & 4 deletions references/recognition/train_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ def main(args):
drop_last=False,
)
print(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
f"{val_loader.num_batches} batches)"
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)

# Load doctr model
Expand Down Expand Up @@ -274,8 +273,7 @@ def main(args):
drop_last=True,
)
print(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
f"{train_loader.num_batches} batches)"
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)

if args.show_samples:
Expand Down
3 changes: 1 addition & 2 deletions tests/pytorch/test_transforms_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ def test_resize():
# Symetric padding
transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
assert repr(transfo) == (
f"Resize(output_size={output_size}, interpolation='bilinear', "
f"preserve_aspect_ratio=True, symmetric_pad=True)"
f"Resize(output_size={output_size}, interpolation='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)"
)
out = transfo(input_t)
assert out.shape[-2:] == output_size
Expand Down
2 changes: 1 addition & 1 deletion tests/tensorflow/test_transforms_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_resize():
# Symetric padding
transfo = T.Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
assert repr(transfo) == (
f"Resize(output_size={output_size}, method='bilinear', " f"preserve_aspect_ratio=True, symmetric_pad=True)"
f"Resize(output_size={output_size}, method='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)"
)
out = transfo(input_t)
# Asymetric padding
Expand Down

0 comments on commit d0eff67

Please sign in to comment.