Fix and run linting (#690)

* Add linting dependencies in pyproject.toml * Add lint session to noxfile.py * Run isort * Run black, line length 79 * flake8 fix: line break in comment in src/scripts/download_autoannotate_data.py * flake8 fix: add imports from __about__ to __all__ in vak/__init__ * flake8 fixes in common/annotation.py - remove dict type hinting that I think was not valid anyways - use isinstance instead of type == numpy.ndarray - fix an incorrect variable name in an error message * flake8 fixes in transforms/transforms.py - Use isinstance checks instead of type == - Add missing values in error message (printed with f-strings to help user find root of issue) * flake8 fixes in transforms/functional.py -- use isinstance, add missing values in fstring for error message * flake8 fixes in frame_labels/functional.py -- use isinstance checks, whitespace before colon * flake8 fixes in frame_labels/__init__.py, noqa on 'unused' imports * flake8 fixes in transforms/__init__.py -- noqa on 'unused' imports and use of *' * flake8 fixes in train/frame_classification.py -- remove unused import, remove unused f-string * flake8 fix in unit_dataset/unit_dataset.py: whitespace before colon * make flake8 fixes in train/__init__.py -- fix imports, add __all__ * Add __all__ in prep/unit_dataset/__init__.py * Add __all__ in prep/split/algorithms/__init__.py * Add __all__ in prep/split/__init__.py * Use isinstance not type == in spectrogram_dataset/spect_helper.py * Add __all__ in prep/spectrogram_dataset/__init__.py * Remove unused f-string, fix line length in prep/parametric_umap/parametric_umap.py * Add __all__ in prep/parametric_umap/__init__.py * Comment out section in predict/parametric_umap.py with undefined names for now * Remove unused f-string prefixes in prep/frame_classification/frame_classification.py * Add parametric_umap to __all__ in prep/__init__.py * Remove unneeded code for annotations in predict/parametric_umap.py * Remove unused f-string prefix in predict/frame_classification.py * Fix imports and __all__ in predict/__init__.py * Add 'annotation' to __all__ in plot/__init__.py * turn on/off isort and squelch flake8 warnings around umap warnings filter in loss/umap.py * Fix imports and __all__ in nn/loss/__init__.py * squelch flake8 warnings in nn/__init__.py * Fix f-string and remove incorrect dict type annotations in models/frame_classification_model.py * Remove unused import in frame_classification_model.py * fix type annotations in models/definition.py * Fix type annotations in models/base.py * Use isinstance not 'type ==' and remove unused f-string prefix in distance/functional.py * Fix imports in distance/__init__.py * Fix imports and __all__ in metric/classifcation/__init__.py * squelch flake8 warnings in metrics/__init__.py * Remove unused f-string prefix and unused import in learncurve/frame_classification.py * define inline functions instead of assigned lambdas, remove unused variables in learncurve/curvefit.py * Remove undefined name in eval/parametric_umap.py * Remove unused f string prefix in eval/frame_classification.py * Fix imports, add __all__ in eval/__init__.py * Check if random_state is None not == None in parametric_umap.py * Fix whitespace in frame_classification/window_dataset.py * Fix 'not value in' -> 'value not in' in frame_classification/metadata.py * Fix __all__ in datasets/__init__.py * Remove unused f-string prefixes in config/prep.py * Add __all__ in config/__init__.py * Fix docstring for common.timenow.get_timenow_as_str -- remove f-string prefix * Use instance not 'type ==' in common/labels.py * Squelch flake8 warning around regex pattern in common/files.py * Add __all__ in common/files/__init__.py * Use isinstance not 'type ==' in common/converters.py function bool_from_str * Use rawstring in datasets/parametric_umap/parametric_umap.py to avoid triggering invalid escape sequence warning from flake8
vocalpy · Aug 14, 2023 · a96ff97 · a96ff97
1 parent a9f9d16
commit a96ff97
Show file tree

Hide file tree

Showing 134 changed files with 2,446 additions and 1,760 deletions.
diff --git a/noxfile.py b/noxfile.py
@@ -50,6 +50,18 @@ def dev(session: nox.Session) -> None:
     session.run(python, "-m", "pip", "install", "-e", ".[dev,test,doc]", external=True)
 
 
+@nox.session(python="3.10")
+def lint(session):
+    """
+    Run the linter.
+    """
+    session.install(".[dev]")
+    # run isort first since black disagrees with it
+    session.run("isort", "./src")
+    session.run("black", "./src", "--line-length=79")
+    session.run("flake8", "./src", "--max-line-length", "120", "--exclude", "./src/crowsetta/_vendor")
+
+
 # ---- used by sessions that "clean up" data for tests
 def clean_dir(dir_path):
     """

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,8 +46,11 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
     "twine >=3.3.0",
-    "black >=20.8b1",
-    "ipython >=7.0"
+    "black >=23.7.0",
+    "flake8 >=6.0.0",
+    "ipython >=7.0",
+    "isort >=5.12.0",
+    "pycln >=2.1.3",
 ]
 test = [
     "pytest >=6.2.1",

diff --git a/src/scripts/download_autoannotate_data.py b/src/scripts/download_autoannotate_data.py
@@ -4,6 +4,7 @@
 https://github.com/NickleDave/bfsongrepo/blob/main/src/scripts/download_dataset.py
 """
 from __future__ import annotations
+
 import argparse
 import pathlib
 import shutil
@@ -12,17 +13,16 @@
 import urllib.request
 import warnings
 
-
 DATA_TO_DOWNLOAD = {
     "gy6or6": {
         "sober.repo1.gy6or6.032212.wav.csv.tar.gz": {
             "MD5": "8c88b46ba87f9784d3690cc8ee4bf2f4",
-            "download": "https://figshare.com/ndownloader/files/37509160"
+            "download": "https://figshare.com/ndownloader/files/37509160",
         },
         "sober.repo1.gy6or6.032312.wav.csv.tar.gz": {
             "MD5": "063ba4d50d1b94009b4b00f0a941d098",
-            "download": "https://figshare.com/ndownloader/files/37509172"
-        }
+            "download": "https://figshare.com/ndownloader/files/37509172",
+        },
     }
 }
 
@@ -39,44 +39,40 @@ def reporthook(count: int, block_size: int, total_size: int) -> None:
     progress_size = int(count * block_size)
     speed = int(progress_size / (1024 * duration))
     percent = int(count * block_size * 100 / total_size)
-    sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
-                    (percent, progress_size / (1024 * 1024), speed, duration))
+    sys.stdout.write(
+        "\r...%d%%, %d MB, %d KB/s, %d seconds passed"
+        % (percent, progress_size / (1024 * 1024), speed, duration)
+    )
     sys.stdout.flush()
 
 
-def download_dataset(download_urls_by_bird_ID: dict,
-                     bfsongrepo_dir: pathlib.Path) -> None:
+def download_dataset(
+    download_urls_by_bird_ID: dict, bfsongrepo_dir: pathlib.Path
+) -> None:
     """download the dataset, given a dict of download urls"""
     tar_dir = bfsongrepo_dir / "tars"
     tar_dir.mkdir()
     # top-level keys are bird ID: bl26lb16, gr41rd51, ...
     for bird_id, tars_dict in download_urls_by_bird_ID.items():
-        print(
-            f'Downloading .tar files for bird: {bird_id}'
-        )
-        # bird ID -> dict where keys are .tar.gz filenames mapping to download url + MD5 hash
+        print(f"Downloading .tar files for bird: {bird_id}")
+        # bird ID -> dict
+        # where keys are .tar.gz filenames mapping to download url + MD5 hash
         for tar_name, url_md5_dict in tars_dict.items():
-            print(
-                f'Downloading tar: {tar_name}'
-            )
-            download_url = url_md5_dict['download']
+            print(f"Downloading tar: {tar_name}")
+            download_url = url_md5_dict["download"]
             filename = tar_dir / tar_name
             urllib.request.urlretrieve(download_url, filename, reporthook)
-            print('\n')
+            print("\n")
 
 
 def extract_tars(bfsongrepo_dir: pathlib.Path) -> None:
     tar_dir = bfsongrepo_dir / "tars"  # made by download_dataset function
-    tars = sorted(tar_dir.glob('*.tar.gz'))
+    tars = sorted(tar_dir.glob("*.tar.gz"))
     for tar_path in tars:
-        print(
-            f"\nunpacking: {tar_path}"
-        )
+        print(f"\nunpacking: {tar_path}")
 
         shutil.unpack_archive(
-            filename=tar_path,
-            extract_dir=bfsongrepo_dir,
-            format="gztar"
+            filename=tar_path, extract_dir=bfsongrepo_dir, format="gztar"
         )
 
 
@@ -87,7 +83,7 @@ def main(dst: str | pathlib.Path) -> None:
         raise NotADirectoryError(
             f"Value for 'dst' argument not recognized as a directory: {dst}"
         )
-    bfsongrepo_dir = dst / 'bfsongrepo'
+    bfsongrepo_dir = dst / "bfsongrepo"
     if bfsongrepo_dir.exists():
         warnings.warn(
             f"Directory already exists: {bfsongrepo_dir}\n"
@@ -103,9 +99,7 @@ def main(dst: str | pathlib.Path) -> None:
             "If that fails, please download files for tutorial manually from the 'download' links in tutorial page."
         ) from e
 
-    print(
-        f'Downloading Bengalese Finch Song Repository to: {bfsongrepo_dir}'
-    )
+    print(f"Downloading Bengalese Finch Song Repository to: {bfsongrepo_dir}")
 
     download_dataset(DATA_TO_DOWNLOAD, bfsongrepo_dir)
     extract_tars(bfsongrepo_dir)
@@ -115,11 +109,13 @@ def get_parser() -> argparse.ArgumentParser:
     """get ArgumentParser used to parse command-line arguments"""
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--dst',
-        default='.',
-        help=("Destination where dataset should be downloaded. "
-              "Default is '.', i.e., current working directory "
-              "from which this script is run.'")
+        "--dst",
+        default=".",
+        help=(
+            "Destination where dataset should be downloaded. "
+            "Default is '.', i.e., current working directory "
+            "from which this script is run.'"
+        ),
     )
     return parser
 

diff --git a/src/vak/__about__.py b/src/vak/__about__.py
@@ -20,7 +20,9 @@
 
 
 __title__ = "vak"
-__summary__ = "a neural network toolbox for animal vocalizations and bioacoustics"
+__summary__ = (
+    "a neural network toolbox for animal vocalizations and bioacoustics"
+)
 __uri__ = "https://github.com/NickleDave/vak"
 
 __version__ = "1.0.0a1"

diff --git a/src/vak/__init__.py b/src/vak/__init__.py
@@ -1,15 +1,3 @@
-from .__about__ import (
-    __author__,
-    __commit__,
-    __copyright__,
-    __email__,
-    __license__,
-    __summary__,
-    __title__,
-    __uri__,
-    __version__,
-)
-
 from . import (
     __main__,
     cli,
@@ -28,17 +16,38 @@
     train,
     transforms,
 )
-
+from .__about__ import (
+    __author__,
+    __commit__,
+    __copyright__,
+    __email__,
+    __license__,
+    __summary__,
+    __title__,
+    __uri__,
+    __version__,
+)
 
 __all__ = [
     "__main__",
+    "__author__",
+    "__commit__",
+    "__copyright__",
+    "__email__",
+    "__license__",
+    "__summary__",
+    "__title__",
+    "__uri__",
+    "__version__",
     "cli",
     "common",
     "config",
     "datasets",
+    "eval",
     "learncurve",
     "metrics",
     "models",
+    "nets",
     "nn",
     "plot",
     "predict",

diff --git a/src/vak/__main__.py b/src/vak/__main__.py
@@ -11,7 +11,7 @@
 def get_parser():
     """returns ArgumentParser instance used by main()"""
     parser = argparse.ArgumentParser(
-        prog='vak',
+        prog="vak",
         description="vak command-line interface",
         formatter_class=argparse.RawTextHelpFormatter,
     )

diff --git a/src/vak/cli/__init__.py b/src/vak/cli/__init__.py
@@ -3,7 +3,6 @@
 
 from . import cli, eval, learncurve, predict, prep, train
 
-
 __all__ = [
     "cli",
     "eval",

diff --git a/src/vak/cli/cli.py b/src/vak/cli/cli.py
@@ -1,25 +1,30 @@
 def eval(toml_path):
     from .eval import eval
+
     eval(toml_path=toml_path)
 
 
 def train(toml_path):
     from .train import train
+
     train(toml_path=toml_path)
 
 
 def learncurve(toml_path):
     from .learncurve import learning_curve
+
     learning_curve(toml_path=toml_path)
 
 
 def predict(toml_path):
     from .predict import predict
+
     predict(toml_path=toml_path)
 
 
 def prep(toml_path):
     from .prep import prep
+
     prep(toml_path=toml_path)
 
 

diff --git a/src/vak/cli/eval.py b/src/vak/cli/eval.py
@@ -5,7 +5,6 @@
 from .. import eval as eval_module
 from ..common.logging import config_logging_for_cli, log_version
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -32,10 +31,7 @@ def eval(toml_path):
 
     # ---- set up logging ---------------------------------------------------------------------------------------------
     config_logging_for_cli(
-        log_dst=cfg.eval.output_dir,
-        log_stem="eval",
-        level="INFO",
-        force=True
+        log_dst=cfg.eval.output_dir, log_stem="eval", level="INFO", force=True
     )
     log_version(logger)
 

diff --git a/src/vak/cli/learncurve.py b/src/vak/cli/learncurve.py
@@ -1,12 +1,11 @@
 import logging
-from pathlib import Path
 import shutil
+from pathlib import Path
 
 from .. import config, learncurve
 from ..common.logging import config_logging_for_cli, log_version
 from ..common.paths import generate_results_dir_name_as_path
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -32,17 +31,16 @@ def learning_curve(toml_path):
         )
 
     # ---- set up directory to save output -----------------------------------------------------------------------------
-    results_path = generate_results_dir_name_as_path(cfg.learncurve.root_results_dir)
+    results_path = generate_results_dir_name_as_path(
+        cfg.learncurve.root_results_dir
+    )
     results_path.mkdir(parents=True)
     # copy config file into results dir now that we've made the dir
     shutil.copy(toml_path, results_path)
 
     # ---- set up logging ----------------------------------------------------------------------------------------------
     config_logging_for_cli(
-        log_dst=results_path,
-        log_stem="learncurve",
-        level="INFO",
-        force=True
+        log_dst=results_path, log_stem="learncurve", level="INFO", force=True
     )
     log_version(logger)
     logger.info("Logging results to {}".format(results_path))

diff --git a/src/vak/cli/predict.py b/src/vak/cli/predict.py
@@ -5,7 +5,6 @@
 from .. import predict as predict_module
 from ..common.logging import config_logging_for_cli, log_version
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -31,7 +30,7 @@ def predict(toml_path):
         log_dst=cfg.predict.output_dir,
         log_stem="predict",
         level="INFO",
-        force=True
+        force=True,
     )
     log_version(logger)
     logger.info("Logging results to {}".format(cfg.prep.output_dir))

diff --git a/src/vak/cli/prep.py b/src/vak/cli/prep.py
@@ -1,8 +1,8 @@
 # note NO LOGGING -- we configure logger inside `core.prep`
 # so we can save log file inside dataset directory
-from pathlib import Path
 import shutil
 import warnings
+from pathlib import Path
 
 import toml
 
@@ -92,7 +92,9 @@ def prep(toml_path):
         )
 
     # now that we've checked that, go ahead and parse the sections we want
-    cfg = config.parse.from_toml_path(toml_path, sections=SECTIONS_PREP_SHOULD_PARSE)
+    cfg = config.parse.from_toml_path(
+        toml_path, sections=SECTIONS_PREP_SHOULD_PARSE
+    )
     # notice we ignore any other option/values in the 'purpose' section,
     # see https://github.com/NickleDave/vak/issues/334 and https://github.com/NickleDave/vak/issues/314
     if cfg.prep is None: