hobuinc · kylemann16 · Sep 16, 2024 · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024
diff --git a/.gitignore b/.gitignore
@@ -21,5 +21,9 @@ stats/
 **/tifs_test/**
 autzen-classified.copc.laz
 
+#sample metrics
+metrics/
+metrics_aligned/
+autzen-aligned.tdb/
 
 .DS_Store
diff --git a/docs/source/api/resources/entry.rst b/docs/source/api/resources/entry.rst
@@ -1,14 +1,15 @@
-Entry
-----------------------------------
-
-.. autoclass:: silvimetric.resources.entry.Entry
-
 Attribute
 ----------------------------------
 
-.. autoclass:: silvimetric.resources.entry.Attribute
+.. automodule:: silvimetric.resources.attribute
+   :members:
+   :undoc-members:
+   :show-inheritance:
 
 Metric
 ----------------------------------
 
-.. autoclass:: silvimetric.resources.metric.Metric
+.. automodule:: silvimetric.resources.metric
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -37,7 +37,6 @@
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
 html_theme = "sphinx_rtd_theme"
 html_static_path = ['_static']
 html_context = {
@@ -49,8 +48,6 @@
   'conf_py_path': '/docs/source/'
 }
 
-if os.environ.get("READXTHEDOCS", "") == "True":
-    html_context["READTHEDOCS"] = True
 
 
 def read_version(filename):

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -12,9 +12,7 @@ SilviMetric is an open source library and set of utilities from
 data into raster and raster-like products.
 
 Find out more about SilviMetric by visiting :ref:`about`. A slide deck about
-SilviMetric is also available on `Google Slides <https://docs.google.com/presentation/d/1E561EgWwLgN5R9P0LBxuI1r7kG155u8E6-MOWpkycSM/edit?usp=sharing>`__,
-and examples are available for viewing in `Google Colab <https://colab.research.google.com/drive/1u3Qdq3Fdy2du36WG823rKVlQtBL8eK0g#scrollTo=kY0ikB6JQ2G7>`__.
-
+SilviMetric is also available on `Google Slides <https://docs.google.com/presentation/d/1E561EgWwLgN5R9P0LBxuI1r7kG155u8E6-MOWpkycSM/edit?usp=sharing>`__.
 
 .. toctree::
    :caption: Contents

diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
@@ -189,7 +189,7 @@ Example:
     $ METRIC_PATH="./path/to/python_metrics.py"
     $ silvimetric --database $DB_NAME initialize --bounds "$BOUNDS" \
           --crs "EPSG:$EPSG" \
-          -m $METRIC_PATH -m min -m max -m mean
+          -m "${METRIC_PATH},min,max,mean"
 
 .. warning::
 
@@ -259,8 +259,7 @@ SilviMetric will take all the previously defined variables like the bounds,
 resolution, and our tile size, and it will split all data values up into their
 respective bins. From here, SilviMetric will perform each `Metric` previously
 defined in :ref:`initialize` over the data in each cell. At the end of all that,
-this data will be written to a `SparseArray` in `TileDB`, where it will be much
-easier to access.
+this data will be written to a `SparseArray` in `TileDB`, where it will be much easier to access.
 
 Usage:
 

diff --git a/environment.yml b/environment.yml
@@ -17,4 +17,5 @@ dependencies:
   - websocket-client
   - python-json-logger
   - dill
-  - pandas
+  - pandas
+  - lmoments3
diff --git a/src/silvimetric/__init__.py b/src/silvimetric/__init__.py
@@ -3,12 +3,14 @@
 from .resources.bounds import Bounds
 from .resources.extents import Extents
 from .resources.storage import Storage
-from .resources.metric import Metric, Metrics
+from .resources.metric import Metric, run_metrics
+from .resources.metrics import grid_metrics, l_moments, percentiles, statistics, all_metrics
+from .resources.metrics import product_moments
 from .resources.log import Log
 from .resources.data import Data
-from .resources.entry import Attribute, Pdal_Attributes, Attributes
-from .resources.config import StorageConfig, ShatterConfig, ExtractConfig, ApplicationConfig
-from .resources.array_extensions import AttributeArray, AttributeDtype
+from .resources.attribute import Attribute, Pdal_Attributes, Attributes
+from .resources.config import StorageConfig, ShatterConfig, ExtractConfig
+from .resources.config import ApplicationConfig
 
 from .commands.shatter import shatter
 from .commands.extract import extract

diff --git a/src/silvimetric/cli/cli.py b/src/silvimetric/cli/cli.py
@@ -16,14 +16,14 @@
 @click.option("--debug", is_flag=True, default=False, help="Changes logging level from INFO to DEBUG.")
 @click.option("--log-dir", default=None, help="Directory for log output", type=str)
 @click.option("--progress", is_flag=True, default=True, type=bool, help="Report progress")
-@click.option("--workers", type=int, default=10, help="Number of workers for Dask")
-@click.option("--threads", type=int, default=4, help="Number of threads per worker for Dask")
+@click.option("--workers", type=int, help="Number of workers for Dask")
+@click.option("--threads", type=int, help="Number of threads per worker for Dask")
 @click.option("--watch", is_flag=True, default=False, type=bool,
         help="Open dask diagnostic page in default web browser.")
 @click.option("--dasktype", default='processes', type=click.Choice(['threads',
         'processes']), help="What Dask uses for parallelization. For more"
         "information see here https://docs.dask.org/en/stable/scheduling.html#local-threads")
-@click.option("--scheduler", default='distributed', type=click.Choice(['distributed',
+@click.option("--scheduler", default='local', type=click.Choice(['distributed',
         'local', 'single-threaded']), help="Type of dask scheduler. Both are "
         "local, but are run with different dask libraries. See more here "
         "https://docs.dask.org/en/stable/scheduling.html.")
@@ -133,14 +133,13 @@ def scan_cmd(app, resolution, point_count, pointcloud, bounds, depth, filter):
         help="Coordinate system of data")
 @click.option("--attributes", "-a", multiple=True, type=AttrParamType(),
         help="List of attributes to include in Database")
-@click.option("--metrics", "-m", multiple=True, type=MetricParamType(),
-        help="List of metrics to include in Database")
+@click.option("--metrics", "-m", type=MetricParamType(), default=[],
+        help="List of metrics to include in output, eg. '-m stats,percentiles'")
 @click.option("--resolution", type=float, default=30.0,
         help="Summary pixel resolution")
 @click.pass_obj
 def initialize_cmd(app: ApplicationConfig, bounds: Bounds, crs: pyproj.CRS,
         attributes: list[Attribute], resolution: float, metrics: list[Metric]):
-    import itertools
     """Initialize silvimetrics DATABASE"""
 
     storageconfig = StorageConfig(tdb_dir = app.tdb_dir,
@@ -205,8 +204,8 @@ def shatter_cmd(app, pointcloud, bounds, report, tilesize, date, dates):
 @cli.command('extract')
 @click.option("--attributes", "-a", multiple=True, type=AttrParamType(), default=[],
         help="List of attributes to include output")
-@click.option("--metrics", "-m", multiple=True, type=MetricParamType(), default=[],
-        help="List of metrics to include in output")
+@click.option("--metrics", "-m", type=MetricParamType(), default=[],
+        help="List of metrics to include in output, eg. '-m stats,percentiles'")
 @click.option("--bounds", type=BoundsParamType(), default=None,
         help="Bounds for data to include in output")
 @click.option("--outdir", "-o", type=click.Path(exists=False), required=True,

diff --git a/src/silvimetric/cli/common.py b/src/silvimetric/cli/common.py
@@ -5,8 +5,10 @@
 import dask
 from dask.diagnostics import ProgressBar
 from dask.distributed import Client, LocalCluster
+from ..resources.metrics import l_moments, percentiles, statistics, product_moments
+from ..resources.metrics import aad, grid_metrics, all_metrics
 
-from .. import Bounds, Attribute, Metric, Attributes, Metrics, Log
+from .. import Bounds, Attribute, Metric, Attributes, Log
 
 
 class BoundsParamType(click.ParamType):
@@ -22,7 +24,7 @@ def convert(self, value, param, ctx):
 class CRSParamType(click.ParamType):
     name = "CRS"
 
-    def convert(self, value, param, ctx):
+    def convert(self, value, param, ctx) -> pyproj.CRS:
         try:
             crs = pyproj.CRS.from_user_input(value)
             return crs
@@ -44,40 +46,68 @@ def convert(self, value, param, ctx) -> list[Attribute]:
             self.fail(f"{value!r} is of an invalid type, {e}", param, ctx)
 
 class MetricParamType(click.ParamType):
-    name="Metrics"
+    name="metrics"
     def convert(self, value, param, ctx) -> list[Metric]:
-        if '.py' in value:
-            try:
-                import importlib.util
-                import os
-                from pathlib import Path
-
-                cwd = os.getcwd()
-                p = Path(cwd, value)
-                if not p.exists():
-                    self.fail("Failed to find import file for metrics at"
-                            f" {str(p)}", param, ctx)
-
-                spec = importlib.util.spec_from_file_location('user_metrics', str(p))
-                user_metrics = importlib.util.module_from_spec(spec)
-                spec.loader.exec_module(user_metrics)
-                ms = user_metrics.metrics()
-            except Exception as e:
-                self.fail(f"Failed to import metrics from {str(p)} with error {e}",
-                        param, ctx)
-
-            for m in ms:
-                if not isinstance(m, Metric):
-                    self.fail(f"Invalid Metric supplied: {m}")
-            return user_metrics.metrics()
-
-        try:
-            return Metrics[value]
-        except Exception as e:
-            self.fail(f"{value!r} is not available in Metrics, {e}", param, ctx)
+        if value is None or not value:
+            return list(all_metrics.values())
+        parsed_values = value.split(',')
+        metrics: set[Metric] = set()
+        for val in parsed_values:
+            if '.py' in val:
+                # user imported metrics from external file
+                try:
+                    import importlib.util
+                    import os
+                    from pathlib import Path
+
+                    cwd = os.getcwd()
+                    p = Path(cwd, val)
+                    if not p.exists():
+                        self.fail("Failed to find import file for metrics at"
+                                f" {str(p)}", param, ctx)
+
+                    spec = importlib.util.spec_from_file_location('user_metrics', str(p))
+                    user_metrics = importlib.util.module_from_spec(spec)
+                    spec.loader.exec_module(user_metrics)
+                    ms = user_metrics.metrics()
+                except Exception as e:
+                    self.fail(f"Failed to import metrics from {str(p)} with error {e}",
+                            param, ctx)
+
+                for m in ms:
+                    if not isinstance(m, Metric):
+                        self.fail(f"Invalid Metric supplied: {m}")
+
+                metrics.update(list(user_metrics.metrics()))
+            else:
+                # SilviMetric defined metrics
+                try:
+                    if val == 'stats':
+                        metrics.update(list(statistics.values()))
+                    elif val == 'p_moments':
+                        metrics.update(list(product_moments.values()))
+                    elif val == 'l_moments':
+                        metrics.update(list(l_moments.values()))
+                    elif val == 'percentiles':
+                        metrics.update(list(percentiles.values()))
+                    elif val == 'aad':
+                        metrics.update(list(aad.aad.values()))
+                    elif val == 'grid_metrics':
+                        metrics.update(list(grid_metrics.values()))
+                    elif val == 'all':
+                        metrics.update(list(all_metrics.values()))
+                    else:
+                        m = all_metrics[val]
+                        if isinstance(m, Metric):
+                            metrics.add(m)
+                        else:
+                            metrics.udpate(list(m))
+                except Exception as e:
+                    self.fail(f"{val!r} is not available in Metrics", param, ctx)
+        return list(metrics)
 
 def dask_handle(dasktype: str, scheduler: str, workers: int, threads: int,
-        watch: bool, log: Log):
+        watch: bool, log: Log) -> None:
     dask_config = { }
 
     if dasktype == 'threads':
@@ -88,9 +118,6 @@ def dask_handle(dasktype: str, scheduler: str, workers: int, threads: int,
         dask_config['threads_per_worker'] = threads
 
     if scheduler == 'local':
-        if scheduler != 'distributed':
-            log.warning("Selected scheduler type does not support continuously"
-                            "updated config information.")
         # fall back to dask type to determine the scheduler type
         dask_config['scheduler'] = dasktype
         if watch:
@@ -117,7 +144,7 @@ def dask_handle(dasktype: str, scheduler: str, workers: int, threads: int,
 
     dask.config.set(dask_config)
 
-def close_dask():
+def close_dask() -> None:
     client = dask.config.get('distributed.client')
     if isinstance(client, Client):
         client.close()
diff --git a/src/silvimetric/commands/extract.py b/src/silvimetric/commands/extract.py
@@ -2,6 +2,7 @@
 from itertools import chain
 
 
+from typing import Union
 from osgeo import gdal, osr
 import dask
 import numpy as np
@@ -54,7 +55,7 @@ def write_tif(xsize: int, ysize: int, data:np.ndarray, name: str,
     tif.FlushCache()
     tif = None
 
-def get_metrics(data_in: pd.DataFrame, storage: Storage):
+def get_metrics(data_in: pd.DataFrame, storage: Storage) -> Union[None, pd.DataFrame]:
     """
     Reruns a metric over this cell. Only called if there is overlapping data.
 

diff --git a/src/silvimetric/commands/scan.py b/src/silvimetric/commands/scan.py
@@ -47,15 +47,15 @@ def scan(tdb_dir: str, pointcloud: str, bounds: Bounds, point_count:int=600000,
             cell_counts = extent_handle(extents, data, resolution, point_count,
                 depth, log)
 
-
+        num_cells = np.sum(cell_counts).item()
         std = np.std(cell_counts)
         mean = np.mean(cell_counts)
         rec = int(mean + std)
 
         pc_info = dict(pc_info=dict(storage_bounds=tdb.config.root.to_json(),
-                data_bounds=data.bounds.to_json(), count=dask.compute(count)))
-        tiling_info = dict(tile_info=dict(num_cells=len(cell_counts), mean=mean,
-                std_dev=std, recommended=rec))
+            data_bounds=data.bounds.to_json(), count=dask.compute(count)))
+        tiling_info = dict(tile_info=dict(num_cells=num_cells,
+            num_tiles=len(cell_counts), mean=mean, std_dev=std, recommended=rec))
 
         final_info = pc_info | tiling_info
         logger.info(json.dumps(final_info, indent=2))