From b76627e3dcfee29ef8e306cc5e398a178cfcec73 Mon Sep 17 00:00:00 2001
From: Jennings Zhang <jenni_zh@protonmail.com>
Date: Sat, 3 Feb 2024 02:57:35 -0500
Subject: [PATCH 1/6] Tag-based overhaul

---
 .github/workflows/ci.yml                      |   8 ++
 Dockerfile                                    |  30 ++--
 print_fetal_brain_atlases_options.py          |  83 -----------
 pubchrisvisual/one.py                         | 133 ------------------
 requirements.txt                              |   3 +
 setup.py                                      |  13 +-
 tests/__init__.py                             |   0
 tests/examples.py                             |  25 ++++
 tests/test_index.py                           |  61 ++++++++
 {pubchrisvisual => visualdataset}/__init__.py |   2 +-
 visualdataset/__main__.py                     |  47 +++++++
 visualdataset/args_types.py                   |  15 ++
 visualdataset/index_nifti_dir.py              |  29 ++++
 visualdataset/json_arg_parser.py              |  42 ++++++
 visualdataset/manifest.py                     |  57 ++++++++
 visualdataset/nifti_dataset.py                |  81 +++++++++++
 visualdataset/nifti_sidecar.py                |  23 +++
 .../types.py => visualdataset/settings.py     |   6 +-
 18 files changed, 419 insertions(+), 239 deletions(-)
 delete mode 100755 print_fetal_brain_atlases_options.py
 delete mode 100644 pubchrisvisual/one.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/examples.py
 create mode 100644 tests/test_index.py
 rename {pubchrisvisual => visualdataset}/__init__.py (95%)
 create mode 100644 visualdataset/__main__.py
 create mode 100644 visualdataset/args_types.py
 create mode 100644 visualdataset/index_nifti_dir.py
 create mode 100644 visualdataset/json_arg_parser.py
 create mode 100644 visualdataset/manifest.py
 create mode 100644 visualdataset/nifti_dataset.py
 create mode 100644 visualdataset/nifti_sidecar.py
 rename pubchrisvisual/types.py => visualdataset/settings.py (90%)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 80c0f08..ca30d75 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -81,6 +81,14 @@ jobs:
           tags: ${{ steps.info.outputs.local_tag }}
           load: true
           cache-from: type=gha
+          build-args: extras_require=dev
+
+      - name: Unit tests
+        run: |
+          docker run --rm \
+            -v '${{ github.workspace }}:/src' -w /src \
+            ${{ steps.info.outputs.local_tag }} \
+            pytest -v --color=yes -o cache_dir=/tmp/pytest
 
       - name: Login to DockerHub
         if: (github.event_name == 'push' || github.event_name == 'release') && contains(steps.info.outputs.tags_csv, 'docker.io')
diff --git a/Dockerfile b/Dockerfile
index 7e481b3..7c3508f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,22 +1,26 @@
-# Python version can be changed, e.g.
-# FROM python:3.8
-# FROM ghcr.io/mamba-org/micromamba:1.5.1-focal-cuda-11.3.1
-FROM docker.io/python:3.12.1-slim-bookworm
+FROM docker.io/mambaorg/micromamba:1.5.5-bookworm-slim AS micromamba
+FROM micromamba AS builder
 
-LABEL org.opencontainers.image.authors="FNNDSC <dev@babymri.org>" \
-      org.opencontainers.image.title="Publish ChRIS Public Dataset" \
-      org.opencontainers.image.description="Mark the outputs of a feed as compatible with the public dataset viewer feature of ChRIS_ui."
+RUN \
+    --mount=type=cache,sharing=private,target=/home/mambauser/.mamba/pkgs,uid=57439,gid=57439 \
+    --mount=type=cache,sharing=private,target=/opt/conda/pkgs,uid=57439,gid=57439 \
+    micromamba -y -n base install -c conda-forge python=3.12.1 nibabel=5.2.0 numpy=1.26.3 tqdm=4.66.1 pydantic=2.6.0
 
-ARG SRCDIR=/usr/local/src/pl-visual-dataset
+ARG SRCDIR=/home/mambauser/pl-visual-dataset
+RUN mkdir "${SRCDIR}"
 WORKDIR ${SRCDIR}
 
 COPY requirements.txt .
-RUN --mount=type=cache,sharing=private,target=/root/.cache/pip pip install -r requirements.txt
+ARG MAMBA_DOCKERFILE_ACTIVATE=1
+RUN pip install -r requirements.txt
 
-COPY . .
+COPY --chown=mambauser:mambauser . .
 ARG extras_require=none
-RUN pip install ".[${extras_require}]" \
-    && cd / && rm -rf ${SRCDIR}
+RUN pip install ".[${extras_require}]" && cd / && rm -rf ${SRCDIR}
 WORKDIR /
 
-CMD ["pub"]
+CMD ["visualdataset"]
+
+LABEL org.opencontainers.image.authors="FNNDSC <dev@babyMRI.org>" \
+      org.opencontainers.image.title="Create ChRIS Visual Dataset" \
+      org.opencontainers.image.description="Prepare a dataset for visualization with ChRIS_ui"
diff --git a/print_fetal_brain_atlases_options.py b/print_fetal_brain_atlases_options.py
deleted file mode 100755
index b743209..0000000
--- a/print_fetal_brain_atlases_options.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-"""
-Notes:
-
-```
-pubone --order 'kiho.nii.gz,serag.nii.gz,ali.nii.gz,aliexp.nii.gz' \
-       --options "$(./print_atlas_options.py)" \
-       --readme "Fetal brain T2 MRI atlas datasets curated by the Fetal-Neonatal Neuroimaging Developmental Science Center. https://www.fnndsc.org/" \
-       incoming/ outgoing/
-```
-"""
-import json
-import sys
-
-from pubchrisvisual.types import ChrisViewerFileOptions, NiivueVolumeOptions
-
-
-MRI_OPTIONS = NiivueVolumeOptions(colormap="gray", colorbarVisible=False)
-LABEL_OPTIONS = NiivueVolumeOptions(colormap="roi_i256", colorbarVisible=False)
-
-CRL_MRI_OPTIONS = ChrisViewerFileOptions(
-    name="T2 MRI",
-    author="CRL (Ali Gholipour et al.)",
-    description="Fetal T2 atlas developed by the Computational Radiology Laboratory of "
-                   "Boston Children's Hospital, Harvard Medical School.",
-    website="http://crl.med.harvard.edu/research/fetal_brain_atlas/",
-    citation=[
-        "A Gholipour, CK Rollins, C Velasco-Annis, A Ouaalam, A Akhondi-Asl, O Afacan, C Ortinau, S Clancy, "
-        "C Limperopoulos, E Yang, JA Estroff, and SK Warfield. A normative spatiotemporal MRI atlas of the "
-        "fetal brain for automatic segmentation and analysis of early brain growth, Scientific Reports 7, "
-        "Article number: 476 (2017). http://www.nature.com/articles/s41598-017-00525-w",
-        "A Gholipour, C Limperopoulos, S Clancy, C Clouchoux, A Akhondi-Asl, J A Estroff, and S K Warfield. "
-        "Construction of a Deformable Spatiotemporal MRI Atlas of the Fetal Brain: Evaluation of Similarity "
-        "Metrics and Deformation Models. MICCAI 2014.",
-        "S Khan, L Vasung, B Marami, CK Rollins, O Afacan, C Ortinau, E Yang, SK Warfield, and A Gholipour. "
-        "Fetal Brain Growth Portrayed by a Spatiotemporal Diffusion Tensor MRI Atlas Computed From In Utero "
-        "Images. NeuroImage 2018. https://doi.org/10.1016/j.neuroimage.2018.08.030"
-    ],
-    niivue_defaults=MRI_OPTIONS
-)
-
-CRL_REGIONAL_OPTIONS = CRL_MRI_OPTIONS | ChrisViewerFileOptions(
-    name="Regional cortex parcellation",
-    description="Regional cortex parcellation of the CRL fetal brain atlas.",
-    niivue_defaults=LABEL_OPTIONS
-)
-
-CRL_TISSUE_OPTIONS = CRL_MRI_OPTIONS | ChrisViewerFileOptions(
-    name="Tissue segmentation (\"Olympic edition\")",
-    description="Tissue segmentation of the CRL fetal brain atlas.",
-    niivue_defaults=LABEL_OPTIONS
-)
-
-KIHO_MRI_OPTIONS = ChrisViewerFileOptions(
-    name="T2 MRI",
-    author="FNNDSC (Kiho Im et al)",
-    description="Fetal T2 atlas developed by the MRI group of the Fetal-Neonatal Neuroimaging Developmental Science "
-                "Center at the Boston Children's Hospital",
-    website="https://research.childrenshospital.org/neuroim/",
-    niivue_defaults=MRI_OPTIONS
-)
-
-SERAG_MRI_OPTIONS = ChrisViewerFileOptions(
-    name="T2 MRI",
-    author="Imperial College London (Serag et al.)",
-    description="Fetal T2 atlas developed at the Imperial College London.",
-    website="https://brain-development.org/brain-atlases/fetal-brain-atlases/fetal-brain-atlas-serag/",
-    niivue_defaults=MRI_OPTIONS
-)
-
-FILENAME_MAPPING: dict[str, ChrisViewerFileOptions] = {
-    "kiho.nii.gz": KIHO_MRI_OPTIONS,
-    "serag.nii.gz": SERAG_MRI_OPTIONS,
-    "ali.nii.gz": CRL_MRI_OPTIONS,
-    "aliexp.nii.gz": CRL_MRI_OPTIONS,
-    "ali_tissue.nii.gz": CRL_TISSUE_OPTIONS,
-    "aliexp_tissue.nii.gz": CRL_TISSUE_OPTIONS,
-    "ali_regional.nii.gz": CRL_REGIONAL_OPTIONS,
-    "aliexp_regional.nii.gz": CRL_REGIONAL_OPTIONS
-}
-
-if __name__ == "__main__":
-    json.dump(FILENAME_MAPPING, sys.stdout, indent=2)
diff --git a/pubchrisvisual/one.py b/pubchrisvisual/one.py
deleted file mode 100644
index a8ec81b..0000000
--- a/pubchrisvisual/one.py
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/usr/bin/env python
-import copy
-import json
-import shutil
-import sys
-from argparse import ArgumentParser, Namespace, ArgumentDefaultsHelpFormatter
-from pathlib import Path
-from typing import Iterable, Sequence
-
-from chris_plugin import chris_plugin
-from pydantic import TypeAdapter, ConfigDict, ValidationError
-
-from pubchrisvisual import DISPLAY_TITLE
-from pubchrisvisual.types import NiivueVolumeOptions, ChrisViewerFileOptions
-
-parser = ArgumentParser(description='Adds options for viewing one file of each subject using ChRIS_ui.',
-                        formatter_class=ArgumentDefaultsHelpFormatter)
-parser.add_argument('--order', type=str,
-                    help='Order of preference for file names as a comma-separated list')
-parser.add_argument('--options', type=str, default='{}',
-                    help='Mapping of file names to default Niivue options. '
-                         'Should either be a relative path or stringified JSON')
-parser.add_argument('--readme', type=str,
-                    help='README file content')
-
-VISIBLE = NiivueVolumeOptions(opacity=1.0)
-INVISIBLE = NiivueVolumeOptions(opacity=0.0)
-
-_OPTIONS_MAPPING_ADAPTER = TypeAdapter(dict[str, ChrisViewerFileOptions])
-_OPTIONS_ADAPTER = TypeAdapter(ChrisViewerFileOptions)
-
-
-@chris_plugin(
-    parser=parser,
-    title='Single Volume ChRIS Visual Dataset',
-    category='Utility',
-    min_memory_limit='256Mi',
-    min_cpu_limit='200m',
-)
-def main(options: Namespace, inputdir: Path, outputdir: Path):
-    configs = deserialize_mapping(path_or_fname(inputdir, options.options))
-    order = [name.strip() for name in options.order.split(',')] if options.order else []
-    print(DISPLAY_TITLE, flush=True)
-    shutil.copytree(inputdir, outputdir, dirs_exist_ok=True)
-    for folder in subject_folders(outputdir):
-        files = [p for p in folder.glob('*.nii.gz') if p.is_file()]
-        preferred = get_preferred_file(files, order)
-        for file in files:
-            base_niivue_config: NiivueVolumeOptions = VISIBLE if file is preferred else INVISIBLE
-            file_config: ChrisViewerFileOptions = copy.deepcopy(configs[file.name]) if file.name in configs else {}
-            if 'niivue_defaults' not in file_config:
-                file_config['niivue_defaults'] = {}
-            file_config['niivue_defaults'] = base_niivue_config | file_config['niivue_defaults']
-
-            if file.name not in configs:
-                print(f"warning: no file name given by --options matches {file}")
-            sidecar = file.with_suffix(file.suffix + '.chrisvisualdataset.volume.json')
-            with sidecar.open('wb') as f:
-                f.write(_OPTIONS_ADAPTER.dump_json(file_config))
-
-    if options.readme is not None:
-        (outputdir / 'README.txt').write_text(options.readme)
-
-    if not options.options.startswith('{'):
-        delete_file_and_empty_parents(outputdir, options.options)
-
-    (outputdir / '.chrisvisualdataset.root.json').write_text('{}')
-
-
-def get_preferred_file(files: Sequence[Path], order: Sequence[str]) -> Path:
-    for preferred_name in order:
-        for file in files:
-            if file.name == preferred_name:
-                return file
-    return files[0]
-
-
-def subject_folders(p: Path) -> Iterable[Path]:
-    return filter(is_dir_containing_nifti, p.glob('*'))
-
-
-def is_dir_containing_nifti(p: Path) -> bool:
-    if not p.is_dir():
-        return False
-    return next(filter(is_nifti_file, p.glob('*.nii.gz')), None) is not None
-
-
-def is_nifti_file(p: Path):
-    return p.is_file() and p.name.endswith('.nii.gz')
-
-
-def is_nifti(p: Path) -> bool:
-    return p.suffix == '.nii.gz'
-
-
-def deserialize_mapping(x: str) -> dict[str, ChrisViewerFileOptions]:
-    try:
-        return _OPTIONS_MAPPING_ADAPTER.validate_json(x, strict=True)
-    except ValidationError as e:
-        print("Invalid value for --options")
-        for error in e.errors():
-            if 'url' in error:
-                del error['url']
-            print(json.dumps(error))
-        sys.exit(1)
-
-
-def path_or_fname(parent_dir: Path, value: str):
-    if value.startswith('{'):
-        return value
-    p = parent_dir / value
-    return p.read_text() if p.is_file() else value
-
-
-def delete_file_and_empty_parents(root: Path, fname: str):
-    p = root / fname
-    if not p.is_file():
-        return
-    p.unlink()
-    delete_empty_dirs(p.parent.resolve(), root.resolve())
-
-
-def delete_empty_dirs(p: Path, root: Path):
-    if p.resolve() == root:
-        return
-    if next(p.glob('*'), None) is not None:
-        return
-    p.rmdir()
-    delete_empty_dirs(p.parent, root)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/requirements.txt b/requirements.txt
index b156c18..76a6082 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,5 @@
 chris_plugin==0.4.0
+nibabel~=5.2.0
+numpy~=1.26.3
+tqdm~=4.66.1
 pydantic~=2.6.0
diff --git a/setup.py b/setup.py
index ebeb77b..d674c90 100644
--- a/setup.py
+++ b/setup.py
@@ -19,18 +19,18 @@ def get_version(rel_path: str) -> str:
 
 
 setup(
-    name='publish-chris-dataset',
-    version=get_version('pubchrisvisual/__init__.py'),
-    description='Mark the outputs of a feed as compatible with the public dataset viewer feature of ChRIS_ui.',
+    name='chrisvisualdataset',
+    version=get_version('visualdataset/__init__.py'),
+    description='Prepare the outputs of a feed for the "visual datasets" feature of ChRIS_ui.',
     author='FNNDSC',
     author_email='dev@babymri.org',
     url='https://github.com/FNNDSC/pl-visual-dataset',
-    packages=['pubchrisvisual'],
+    packages=['visualdataset'],
     install_requires=['chris_plugin'],
     license='MIT',
     entry_points={
         'console_scripts': [
-            'pubone = pubchrisvisual.one:main'
+            'visualdataset = visualdataset.__main__:main'
         ]
     },
     classifiers=[
@@ -42,7 +42,8 @@ def get_version(rel_path: str) -> str:
     extras_require={
         'none': [],
         'dev': [
-            'pytest~=7.1'
+            'pytest~=8.0',
+            'pytest-unordered~=0.5.2'
         ]
     }
 )
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/examples.py b/tests/examples.py
new file mode 100644
index 0000000..0706bc6
--- /dev/null
+++ b/tests/examples.py
@@ -0,0 +1,25 @@
+import sys
+from typing import Sequence
+
+from pydantic import TypeAdapter
+
+from visualdataset.args_types import Matcher
+
+
+FETAL_ATLAS_MATCHERS: Sequence[Matcher] = [
+    *(Matcher(key='age', value=str(age), regex=f'Age {age}/') for age in range(10, 40, 1)),
+
+    Matcher(key='author', value='Ahmed Serag et al.', regex=r'/serag\.nii\.gz$'),
+    Matcher(key='author', value="Ali Gholipour et al., CRL", regex=r'/ali.*\.nii\.gz$'),
+    Matcher(key='author', value="Kiho Im et al., FNNDSC", regex=r'/kiho\.nii\.gz$'),
+
+    Matcher(key='institution', value="Boston Children's Hospital", regex=r'/(kiho|ali).*\.nii\.gz$'),
+    Matcher(key='institution', value="Imperial College London", regex=r'/serag\.nii\.gz$'),
+
+    Matcher(key='type', value='mri', regex=r'/(ali|aliexp|kiho|serag)\.nii\.gz$'),
+    Matcher(key='type', value='segmentation', regex=r'/(ali|aliexp)_.+\.nii\.gz$'),
+]
+
+if __name__ == '__main__':
+    adapter = TypeAdapter(Sequence[Matcher])
+    print(adapter.dump_json(FETAL_ATLAS_MATCHERS).decode('utf-8'))
diff --git a/tests/test_index.py b/tests/test_index.py
new file mode 100644
index 0000000..5eec4a9
--- /dev/null
+++ b/tests/test_index.py
@@ -0,0 +1,61 @@
+from pathlib import Path
+import pytest
+from pytest_unordered import unordered
+
+from visualdataset.index_nifti_dir import index_nifti_dir
+from tests.examples import FETAL_ATLAS_MATCHERS
+from visualdataset.manifest import VisualDatasetFile
+
+
+def test_index_dir(tmp_path: Path):
+    example_files = [
+        'Age 36/serag.nii.gz',
+        'Age 37/ali.nii.gz',
+        'Age 37/ali_regional.nii.gz',
+        'Age 37/ali_tissue.nii.gz',
+    ]
+    for example in example_files:
+        p = tmp_path / example
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.touch()
+
+    actual = list(index_nifti_dir(tmp_path, FETAL_ATLAS_MATCHERS))
+    expected = [
+        VisualDatasetFile(
+            path='Age 36/serag.nii.gz',
+            tags={
+                'age': '36',
+                'author': 'Ahmed Serag et al.',
+                'institution': 'Imperial College London',
+                'type': 'mri'
+            },
+        ),
+        VisualDatasetFile(
+            path='Age 37/ali.nii.gz',
+            tags={
+                'age': '37',
+                'author': 'Ali Gholipour et al., CRL',
+                'institution': "Boston Children's Hospital",
+                'type': 'mri'
+            }
+        ),
+        VisualDatasetFile(
+            path='Age 37/ali_regional.nii.gz',
+            tags={
+                'age': '37',
+                'author': 'Ali Gholipour et al., CRL',
+                'institution': "Boston Children's Hospital",
+                'type': 'segmentation'
+            }
+        ),
+        VisualDatasetFile(
+            path='Age 37/ali_tissue.nii.gz',
+            tags={
+                'age': '37',
+                'author': 'Ali Gholipour et al., CRL',
+                'institution': "Boston Children's Hospital",
+                'type': 'segmentation'
+            }
+        ),
+    ]
+    assert actual == unordered(expected)
diff --git a/pubchrisvisual/__init__.py b/visualdataset/__init__.py
similarity index 95%
rename from pubchrisvisual/__init__.py
rename to visualdataset/__init__.py
index 65b9b6f..a0ef5dd 100644
--- a/pubchrisvisual/__init__.py
+++ b/visualdataset/__init__.py
@@ -9,4 +9,4 @@
 |_|
 """
 
-__version__ = '0.0.5'
+__version__ = '0.1.0'
diff --git a/visualdataset/__main__.py b/visualdataset/__main__.py
new file mode 100644
index 0000000..5d88ea3
--- /dev/null
+++ b/visualdataset/__main__.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+from argparse import ArgumentParser, Namespace, ArgumentDefaultsHelpFormatter
+from pathlib import Path
+
+from chris_plugin import chris_plugin
+from pydantic import TypeAdapter
+
+from visualdataset import DISPLAY_TITLE
+from visualdataset.json_arg_parser import parse_args
+from visualdataset.nifti_dataset import nifti_dataset
+from visualdataset.settings import ChrisViewerFileOptions
+
+parser = ArgumentParser(description='Prepares a dataset for use with the ChRIS_ui '
+                                    '"Visual Datasets" feature.',
+                        formatter_class=ArgumentDefaultsHelpFormatter)
+parser.add_argument('--matchers', type=str, required=True,
+                    help='Regular expressions used to assign tags to files')
+parser.add_argument('--options', type=str,
+                    help='Metadata to go with tag sets')
+parser.add_argument('-s', '--string-args', action='store_true',
+                    help='Interpret --matchers and --options as data instead of paths')
+parser.add_argument('--first-run-files', type=str,
+                    help='List of files to show on first run, '
+                         'as a stringified JSON list of paths relative to inputdir')
+parser.add_argument('--readme', type=str,
+                    help='README file content')
+
+_LIST_ADAPTER = TypeAdapter(list[str])
+
+
+@chris_plugin(
+    parser=parser,
+    title='Single Volume ChRIS Visual Dataset',
+    category='Utility',
+    min_memory_limit='1Gi',
+    min_cpu_limit='1000m',
+)
+def main(options: Namespace, inputdir: Path, outputdir: Path):
+    matchers, tag_options = parse_args(options.matchers, options.options,
+                                       None if options.string_args else inputdir)
+    first_run_files = [] if options.first_run_files is None else _LIST_ADAPTER.validate_json(options.first_run_files)
+    print(DISPLAY_TITLE, flush=True)
+    nifti_dataset(inputdir, outputdir, matchers, tag_options, first_run_files, options.readme)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/visualdataset/args_types.py b/visualdataset/args_types.py
new file mode 100644
index 0000000..b8b6703
--- /dev/null
+++ b/visualdataset/args_types.py
@@ -0,0 +1,15 @@
+import functools
+import re
+
+from pydantic import BaseModel
+
+
+class Matcher(BaseModel):
+    key: str
+    value: str
+    regex: str
+
+    @functools.cached_property
+    def re(self):
+        return re.compile(self.regex)
+
diff --git a/visualdataset/index_nifti_dir.py b/visualdataset/index_nifti_dir.py
new file mode 100644
index 0000000..22a9a4a
--- /dev/null
+++ b/visualdataset/index_nifti_dir.py
@@ -0,0 +1,29 @@
+import os.path
+from pathlib import Path, PurePath
+from typing import Iterator, Sequence
+
+from visualdataset.args_types import Matcher
+from visualdataset.manifest import VisualDatasetFile
+
+
+def index_nifti_dir(input_dir: Path, matchers: Sequence[Matcher]) -> Iterator[VisualDatasetFile]:
+    """
+    Scan a directory for files matching the matchers.
+    """
+    nifti_files = filter(os.path.isfile, input_dir.rglob('*.nii.gz', case_sensitive=False))
+    rel_paths = (p.relative_to(input_dir) for p in nifti_files)
+    matches = (match_file(p, matchers) for p in rel_paths)
+    return filter(_has_tags, matches)
+
+
+def match_file(path: PurePath, matchers: Sequence[Matcher]) -> VisualDatasetFile:
+    tags = {
+        matcher.key: matcher.value
+        for matcher in matchers
+        if matcher.re.search(str(path)) is not None
+    }
+    return VisualDatasetFile(path=PurePath(path), tags=tags)
+
+
+def _has_tags(match: VisualDatasetFile) -> bool:
+    return len(match.tags) > 0
diff --git a/visualdataset/json_arg_parser.py b/visualdataset/json_arg_parser.py
new file mode 100644
index 0000000..ea5c8d7
--- /dev/null
+++ b/visualdataset/json_arg_parser.py
@@ -0,0 +1,42 @@
+import json
+import sys
+from pathlib import Path
+from typing import Sequence, TypeVar, Type
+
+from pydantic import BaseModel, ValidationError
+
+from visualdataset.args_types import Matcher
+from visualdataset.manifest import OptionsLink
+
+
+def parse_args(matchers: str | None, options: str | None, input_dir: Path | None,
+               ) -> tuple[Sequence[Matcher], Sequence[OptionsLink]]:
+    if input_dir:
+        matchers_str = '[]' if matchers is None else (input_dir / matchers).read_text()
+        options_str = '[]' if options is None else (input_dir / options).read_text()
+    else:
+        matchers_str = '[]' if matchers is None else matchers
+        options_str = '[]' if options is None else options
+    matchers_list = deserialize_list(matchers_str, Matcher, '--matchers')
+    options_list = deserialize_list(options_str, OptionsLink, '--options')
+    return matchers_list, options_list
+
+
+_M = TypeVar('_M', bound=BaseModel)
+
+
+def deserialize_list(s: str, t: Type[_M], flag: str) -> Sequence[_M]:
+    try:
+        data = json.loads(s)
+    except json.JSONDecodeError:
+        print(f'Invalid value for {flag}: not JSON')
+        sys.exit(1)
+    if not isinstance(data, list):
+        print(f'Invalid value for {flag}: not JSON list')
+        sys.exit(1)
+    try:
+        return [t.model_validate(x, strict=True) for x in data]
+    except ValidationError as e:
+        print(f"Invalid value for {flag}:")
+        print(e)
+        sys.exit(1)
diff --git a/visualdataset/manifest.py b/visualdataset/manifest.py
new file mode 100644
index 0000000..d1c262b
--- /dev/null
+++ b/visualdataset/manifest.py
@@ -0,0 +1,57 @@
+from pathlib import PurePath
+from pydantic import BaseModel, ConfigDict
+from typing import Sequence, FrozenSet, Tuple, Mapping, Optional, Set
+
+from visualdataset.settings import ChrisViewerFileOptions
+
+
+class VisualDatasetFile(BaseModel):
+    """
+    Index data about a file of a "visual dataset".
+    """
+    path: PurePath
+    """
+    Path of file relative to the plugin instance's output directory.
+    """
+    tags: Mapping[str, str]
+    """
+    Metadata as key-value pairs which identify the file.
+    """
+    has_sidecar: bool = False
+    """
+    Whether or not the file has a corresponding `.chrisvisualdataset.volume.json` sidecar file.
+    """
+
+    __pydantic_config__ = ConfigDict(extra='forbid')
+
+
+class OptionsLink(BaseModel):
+    """
+    An association between some options and a set of tags.
+    """
+    match: FrozenSet[Tuple[str, str]]
+    options: ChrisViewerFileOptions
+
+
+class VisualDatasetManifest(BaseModel):
+    """
+    A list of all the files and metadata of a "visual dataset".
+    """
+    tags: Mapping[str, Set[str]]
+    """
+    All known tags and all known values for each tag.
+    """
+    files: Sequence[VisualDatasetFile]
+    """
+    Files in this dataset.
+    """
+    options: Sequence[OptionsLink]
+    """
+    Options for files.
+    """
+    first_run_files: Sequence[int]
+    """
+    Index numbers into ``files`` for which files to show when the viewer is first opened.
+    """
+
+    __pydantic_config__ = ConfigDict(extra='forbid')
diff --git a/visualdataset/nifti_dataset.py b/visualdataset/nifti_dataset.py
new file mode 100644
index 0000000..06c94e5
--- /dev/null
+++ b/visualdataset/nifti_dataset.py
@@ -0,0 +1,81 @@
+import sys
+from pathlib import Path
+from typing import Sequence, Optional, Mapping, Set
+
+from tqdm import tqdm
+
+from visualdataset.args_types import Matcher
+from visualdataset.index_nifti_dir import index_nifti_dir
+from visualdataset.manifest import VisualDatasetFile, OptionsLink, VisualDatasetManifest
+from visualdataset.nifti_sidecar import create_sidecar
+
+
+def nifti_dataset(
+        input_dir: Path,
+        output_dir: Path,
+        matchers: Sequence[Matcher],
+        options: Sequence[OptionsLink],
+        first_run_files: Sequence[str],
+        readme: Optional[str]
+):
+    with tqdm(desc='Scanning input directory...'):
+        index = [i.model_copy(update={'has_sidecar': True}) for i in index_nifti_dir(input_dir, matchers)]
+
+    if not index:
+        print(f'Error: nothing matched for: {[m.regex for m in matchers]}')
+        sys.exit(1)
+
+    first_run_index_nums = find_first_run_files(input_dir, index, first_run_files)
+
+    with tqdm(index, desc='Writing outputs') as pbar:
+        for file in pbar:
+            output_path = output_dir / file.path
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            sidecar_path = output_path.with_suffix(output_path.suffix + '.chrisvisualdataset.volume.json')
+            create_sidecar(input_dir / file.path, sidecar_path)
+
+    manifest = VisualDatasetManifest(
+        tags=aggregate_tags(index),
+        files=index,
+        options=options,
+        first_run_files=first_run_index_nums
+    )
+
+    manifest_path = output_dir / '.chrisvisualdataset.tagmanifest.json'
+    manifest_path.write_text(manifest.model_dump_json())
+
+    if readme is not None:
+        readme_path = output_dir / 'README.txt'
+        readme_path.write_text(readme)
+
+
+def aggregate_tags(index: Sequence[VisualDatasetFile]) -> Mapping[str, Set[str]]:
+    """
+    Get all tag and all of their possible values.
+    """
+    tags = {}
+    for file in index:
+        for key, value in file.tags.items():
+            if key not in tags:
+                tags[key] = set()
+            tags[key].add(value)
+    return tags
+
+
+def find_first_run_files(
+        input_dir: Path,
+        index: Sequence[VisualDatasetFile],
+        first_run_files: Sequence[str]
+) -> Sequence[int]:
+    """
+    Find all elements of ``first_run_files`` as paths in ``index``, then return a list of their array index numbers.
+    """
+    first_run_index_nums = []
+    indexed_paths = [str(file.path) for file in index]
+    for file in first_run_files:
+        try:
+            first_run_index_nums.append(indexed_paths.index(file))
+        except ValueError:
+            print(f'File was not matched: {file}')
+            sys.exit(1)
+    return first_run_index_nums
diff --git a/visualdataset/nifti_sidecar.py b/visualdataset/nifti_sidecar.py
new file mode 100644
index 0000000..4ba9e48
--- /dev/null
+++ b/visualdataset/nifti_sidecar.py
@@ -0,0 +1,23 @@
+from pathlib import Path
+
+import numpy as np
+import nibabel as nib
+from pydantic import TypeAdapter
+
+from visualdataset.settings import NiivueVolumeSettings
+
+_SETTINGS_ADAPTER = TypeAdapter(NiivueVolumeSettings)
+
+
+def create_sidecar(img: Path, output: Path):
+    cal_min, cal_max = get_range(img)
+    settings = NiivueVolumeSettings(cal_min=cal_min, cal_max=cal_max)
+    output.write_bytes(_SETTINGS_ADAPTER.dump_json(settings))
+
+
+def get_range(img: Path):
+    vol = nib.load(img)
+    data = vol.get_fdata()
+    cal_min = np.min(data)
+    cal_max = np.max(data)
+    return cal_min, cal_max
diff --git a/pubchrisvisual/types.py b/visualdataset/settings.py
similarity index 90%
rename from pubchrisvisual/types.py
rename to visualdataset/settings.py
index 2edbb01..83ed72c 100644
--- a/pubchrisvisual/types.py
+++ b/visualdataset/settings.py
@@ -6,9 +6,9 @@
 from pydantic import ConfigDict, TypeAdapter, HttpUrl
 
 
-class NiivueVolumeOptions(TypedDict):
+class NiivueVolumeSettings(TypedDict):
     """
-    Options supported by Niivue for volumes.
+    Settings supported by Niivue for volumes.
 
     https://github.com/niivue/niivue-react/blob/d56dcd2b3f58ce854686e77963f3a7a89599765f/src/model.ts#L30-L76
     """
@@ -48,7 +48,7 @@ class ChrisViewerFileOptions(TypedDict):
     """
     Website for the dataset
     """
-    niivue_defaults: NotRequired[NiivueVolumeOptions]
+    niivue_defaults: NotRequired[NiivueVolumeSettings]
     """
     Default volume rendering options
     """

From 4530c86611c6b05e7e7a1848db903d3c73f625d9 Mon Sep 17 00:00:00 2001
From: Jennings Zhang <jenni_zh@protonmail.com>
Date: Sat, 3 Feb 2024 02:59:43 -0500
Subject: [PATCH 2/6] Enable testing for PRs

---
 .github/workflows/ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ca30d75..064d1c5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -19,7 +19,6 @@ on:
 jobs:
   build:
     name: Build
-    if: github.event_name == 'push' || github.event_name == 'release'
     runs-on: ubuntu-22.04
 
     steps:

From 10b6b077275cb8c81ff62cdb9dc4f83a91feb31e Mon Sep 17 00:00:00 2001
From: Jennings Zhang <jenni_zh@protonmail.com>
Date: Sat, 3 Feb 2024 03:20:13 -0500
Subject: [PATCH 3/6] Rename back to options.py

---
 visualdataset/__main__.py                 | 2 +-
 visualdataset/manifest.py                 | 2 +-
 visualdataset/nifti_sidecar.py            | 2 +-
 visualdataset/{settings.py => options.py} | 0
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename visualdataset/{settings.py => options.py} (100%)

diff --git a/visualdataset/__main__.py b/visualdataset/__main__.py
index 5d88ea3..b118b01 100644
--- a/visualdataset/__main__.py
+++ b/visualdataset/__main__.py
@@ -8,7 +8,7 @@
 from visualdataset import DISPLAY_TITLE
 from visualdataset.json_arg_parser import parse_args
 from visualdataset.nifti_dataset import nifti_dataset
-from visualdataset.settings import ChrisViewerFileOptions
+from visualdataset.options import ChrisViewerFileOptions
 
 parser = ArgumentParser(description='Prepares a dataset for use with the ChRIS_ui '
                                     '"Visual Datasets" feature.',
diff --git a/visualdataset/manifest.py b/visualdataset/manifest.py
index d1c262b..878df86 100644
--- a/visualdataset/manifest.py
+++ b/visualdataset/manifest.py
@@ -2,7 +2,7 @@
 from pydantic import BaseModel, ConfigDict
 from typing import Sequence, FrozenSet, Tuple, Mapping, Optional, Set
 
-from visualdataset.settings import ChrisViewerFileOptions
+from visualdataset.options import ChrisViewerFileOptions
 
 
 class VisualDatasetFile(BaseModel):
diff --git a/visualdataset/nifti_sidecar.py b/visualdataset/nifti_sidecar.py
index 4ba9e48..380da4f 100644
--- a/visualdataset/nifti_sidecar.py
+++ b/visualdataset/nifti_sidecar.py
@@ -4,7 +4,7 @@
 import nibabel as nib
 from pydantic import TypeAdapter
 
-from visualdataset.settings import NiivueVolumeSettings
+from visualdataset.options import NiivueVolumeSettings
 
 _SETTINGS_ADAPTER = TypeAdapter(NiivueVolumeSettings)
 
diff --git a/visualdataset/settings.py b/visualdataset/options.py
similarity index 100%
rename from visualdataset/settings.py
rename to visualdataset/options.py

From b59b215f9d18e843edb083fb349f4a1565c8aed8 Mon Sep 17 00:00:00 2001
From: Jennings Zhang <jenni_zh@protonmail.com>
Date: Sat, 3 Feb 2024 03:21:32 -0500
Subject: [PATCH 4/6] Change type of OptionsLink.match to Mapping

---
 visualdataset/manifest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/visualdataset/manifest.py b/visualdataset/manifest.py
index 878df86..e47a4fe 100644
--- a/visualdataset/manifest.py
+++ b/visualdataset/manifest.py
@@ -29,7 +29,7 @@ class OptionsLink(BaseModel):
     """
     An association between some options and a set of tags.
     """
-    match: FrozenSet[Tuple[str, str]]
+    match: Mapping[str, str]
     options: ChrisViewerFileOptions
 
 

From 7dd9d7ece1650316cfc936aff85da38745fb38e7 Mon Sep 17 00:00:00 2001
From: Jennings Zhang <jenni_zh@protonmail.com>
Date: Sat, 3 Feb 2024 17:07:30 -0500
Subject: [PATCH 5/6] Print warnings for invalid options

---
 tests/{examples.py => example_matchers.py} | 12 +--
 tests/example_options.py                   | 77 ++++++++++++++++++++
 tests/test_index.py                        | 12 +--
 tests/test_validate.py                     | 85 ++++++++++++++++++++++
 visualdataset/nifti_dataset.py             |  5 ++
 visualdataset/validate.py                  | 56 ++++++++++++++
 6 files changed, 236 insertions(+), 11 deletions(-)
 rename tests/{examples.py => example_matchers.py} (60%)
 create mode 100644 tests/example_options.py
 create mode 100644 tests/test_validate.py
 create mode 100644 visualdataset/validate.py

diff --git a/tests/examples.py b/tests/example_matchers.py
similarity index 60%
rename from tests/examples.py
rename to tests/example_matchers.py
index 0706bc6..63ffae1 100644
--- a/tests/examples.py
+++ b/tests/example_matchers.py
@@ -1,23 +1,23 @@
-import sys
 from typing import Sequence
 
 from pydantic import TypeAdapter
 
 from visualdataset.args_types import Matcher
 
-
 FETAL_ATLAS_MATCHERS: Sequence[Matcher] = [
     *(Matcher(key='age', value=str(age), regex=f'Age {age}/') for age in range(10, 40, 1)),
 
     Matcher(key='author', value='Ahmed Serag et al.', regex=r'/serag\.nii\.gz$'),
-    Matcher(key='author', value="Ali Gholipour et al., CRL", regex=r'/ali.*\.nii\.gz$'),
-    Matcher(key='author', value="Kiho Im et al., FNNDSC", regex=r'/kiho\.nii\.gz$'),
+    Matcher(key='author', value='Ali Gholipour et al., CRL', regex=r'/ali.*\.nii\.gz$'),
+    Matcher(key='author', value='Kiho Im et al., FNNDSC', regex=r'/kiho\.nii\.gz$'),
 
     Matcher(key='institution', value="Boston Children's Hospital", regex=r'/(kiho|ali).*\.nii\.gz$'),
     Matcher(key='institution', value="Imperial College London", regex=r'/serag\.nii\.gz$'),
 
-    Matcher(key='type', value='mri', regex=r'/(ali|aliexp|kiho|serag)\.nii\.gz$'),
-    Matcher(key='type', value='segmentation', regex=r'/(ali|aliexp)_.+\.nii\.gz$'),
+    Matcher(key='type', value='T2 MRI', regex=r'/(ali|aliexp|kiho|serag)\.nii\.gz$'),
+    Matcher(key='type', value='labels', regex=r'/(ali|aliexp)_.+\.nii\.gz$'),
+    Matcher(key='labels', value='tissue', regex=r'/(ali|aliexp)_tissue\.nii\.gz$'),
+    Matcher(key='labels', value='parcellation', regex=r'/(ali|aliexp)_regional\.nii\.gz$'),
 ]
 
 if __name__ == '__main__':
diff --git a/tests/example_options.py b/tests/example_options.py
new file mode 100644
index 0000000..5f57096
--- /dev/null
+++ b/tests/example_options.py
@@ -0,0 +1,77 @@
+from typing import Sequence
+
+from pydantic import TypeAdapter
+
+from visualdataset.manifest import OptionsLink
+from visualdataset.options import ChrisViewerFileOptions, NiivueVolumeSettings
+
+FETAL_ATLAS_OPTIONS: Sequence[OptionsLink] = [
+    OptionsLink(
+        match={'author': 'Ali Gholipour et al., CRL'},
+        options=ChrisViewerFileOptions(
+            author='Ali Gholipour et al., CRL',
+            website='http://crl.med.harvard.edu/research/fetal_brain_atlas/',
+            citation=[
+                "A Gholipour, CK Rollins, C Velasco-Annis, A Ouaalam, A Akhondi-Asl, O Afacan, C Ortinau, S Clancy, "
+                "C Limperopoulos, E Yang, JA Estroff, and SK Warfield. A normative spatiotemporal MRI atlas of the "
+                "fetal brain for automatic segmentation and analysis of early brain growth, Scientific Reports 7, "
+                "Article number: 476 (2017). http://www.nature.com/articles/s41598-017-00525-w",
+                "A Gholipour, C Limperopoulos, S Clancy, C Clouchoux, A Akhondi-Asl, J A Estroff, and S K Warfield. "
+                "Construction of a Deformable Spatiotemporal MRI Atlas of the Fetal Brain: Evaluation of Similarity "
+                "Metrics and Deformation Models. MICCAI 2014.",
+                "S Khan, L Vasung, B Marami, CK Rollins, O Afacan, C Ortinau, E Yang, SK Warfield, and A Gholipour. "
+                "Fetal Brain Growth Portrayed by a Spatiotemporal Diffusion Tensor MRI Atlas Computed From In Utero "
+                "Images. NeuroImage 2018. https://doi.org/10.1016/j.neuroimage.2018.08.030"
+            ],
+        )
+    ),
+    OptionsLink(
+        match={'author': 'Ahmed Serag et al.'},
+        options=ChrisViewerFileOptions(
+            author='Ahmed Serag et al.',
+            website='https://brain-development.org/brain-atlases/fetal-brain-atlases/fetal-brain-atlas-serag/',
+            citation=[
+                'A. Serag, P. Aljabar, G. Ball, S.J. Counsell, J.P. Boardman, M.A. Rutherford, A.D. Edwards, '
+                'J.V. Hajnal, D. Rueckert. “Construction of a consistent high-definition spatio-temporal atlas '
+                'of the developing brain using adaptive kernel regression”. NeuroImage, 59 (3), 2255-65, 2012. '
+                'http://dx.doi.org/10.1016/j.neuroimage.2011.09.062',
+                'A. Serag, V. Kyriakopoulou, P. Aljabar, S.J. Counsell, J.P. Boardman, M.A. Rutherford, '
+                'A.D. Edwards, J.V. Hajnal, D. Rueckert. “A Multi-channel 4D Probabilistic Atlas of the '
+                'Developing Brain: Application to Fetuses and Neonates”. Special Issue of the Annals of '
+                'the British Machine Vision Association, 2012.'
+            ]
+        )
+    ),
+    OptionsLink(
+        match={'author': 'Kiho Im et al., FNNDSC'},
+        options=ChrisViewerFileOptions(
+            author='Kiho Im et al., FNNDSC',
+            website='https://research.childrenshospital.org/neuroim/',
+        )
+    ),
+    OptionsLink(
+        match={'type': 'T2 MRI'},
+        options=ChrisViewerFileOptions(
+            name='T2 MRI',
+            niivue_defaults=NiivueVolumeSettings(colormap='gray')
+        )
+    ),
+    OptionsLink(
+        match={'type': 'labels'},
+        options=ChrisViewerFileOptions(
+            niivue_defaults=NiivueVolumeSettings(colormap='roi_i256')
+        )
+    ),
+    OptionsLink(
+        match={'author': 'Ali Gholipour et al., CRL', 'labels': 'tissue'},
+        options=ChrisViewerFileOptions(name='Tissue segmentation ("Olympic edition")')
+    ),
+    OptionsLink(
+        match={'author': 'Ali Gholipour et al., CRL', 'labels': 'parcellation'},
+        options=ChrisViewerFileOptions(name='Regional cortex parcellation of the CRL fetal brain atlas.')
+    ),
+]
+
+if __name__ == '__main__':
+    adapter = TypeAdapter(Sequence[OptionsLink])
+    print(adapter.dump_json(FETAL_ATLAS_OPTIONS).decode('utf-8'))
diff --git a/tests/test_index.py b/tests/test_index.py
index 5eec4a9..3f6b00a 100644
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -3,7 +3,7 @@
 from pytest_unordered import unordered
 
 from visualdataset.index_nifti_dir import index_nifti_dir
-from tests.examples import FETAL_ATLAS_MATCHERS
+from tests.example_matchers import FETAL_ATLAS_MATCHERS
 from visualdataset.manifest import VisualDatasetFile
 
 
@@ -27,7 +27,7 @@ def test_index_dir(tmp_path: Path):
                 'age': '36',
                 'author': 'Ahmed Serag et al.',
                 'institution': 'Imperial College London',
-                'type': 'mri'
+                'type': 'T2 MRI'
             },
         ),
         VisualDatasetFile(
@@ -36,7 +36,7 @@ def test_index_dir(tmp_path: Path):
                 'age': '37',
                 'author': 'Ali Gholipour et al., CRL',
                 'institution': "Boston Children's Hospital",
-                'type': 'mri'
+                'type': 'T2 MRI'
             }
         ),
         VisualDatasetFile(
@@ -45,7 +45,8 @@ def test_index_dir(tmp_path: Path):
                 'age': '37',
                 'author': 'Ali Gholipour et al., CRL',
                 'institution': "Boston Children's Hospital",
-                'type': 'segmentation'
+                'type': 'labels',
+                'labels': 'parcellation'
             }
         ),
         VisualDatasetFile(
@@ -54,7 +55,8 @@ def test_index_dir(tmp_path: Path):
                 'age': '37',
                 'author': 'Ali Gholipour et al., CRL',
                 'institution': "Boston Children's Hospital",
-                'type': 'segmentation'
+                'type': 'labels',
+                'labels': 'tissue'
             }
         ),
     ]
diff --git a/tests/test_validate.py b/tests/test_validate.py
new file mode 100644
index 0000000..5ef71e0
--- /dev/null
+++ b/tests/test_validate.py
@@ -0,0 +1,85 @@
+import pytest
+from pytest_unordered import unordered
+
+from visualdataset.manifest import OptionsLink, VisualDatasetFile
+from visualdataset.options import ChrisViewerFileOptions, NiivueVolumeSettings
+from visualdataset.validate import dict_is_subset, check_indexed_file_has_options
+
+
+def test_check_indexed_file_has_options_works():
+    options = [
+        OptionsLink(
+            match={'type': 'MRI'},
+            options=ChrisViewerFileOptions(
+                name='Magnetic Resonance Imaging',
+                niivue_defaults=NiivueVolumeSettings(colormap='gray')
+            )
+        ),
+        OptionsLink(
+            match={'creator': 'me'},
+            options=ChrisViewerFileOptions(author='Me, who is a person', website='https://example.com')
+        )
+    ]
+    file = VisualDatasetFile(path='iamthe.path', tags={'type': 'MRI', 'creator': 'me'})
+    assert check_indexed_file_has_options(file, options) == []
+
+
+def test_check_indexed_file_has_options_warnings():
+    options = [
+        OptionsLink(
+            match={'type': 'MRI'},
+            options=ChrisViewerFileOptions(
+                name='Magnetic Resonance Imaging',
+                niivue_defaults=NiivueVolumeSettings(opacity=0.8)
+            ),
+        ),
+        OptionsLink(
+            match={'creator': 'me'},
+            options=ChrisViewerFileOptions(
+                name='Made by me',
+                niivue_defaults=NiivueVolumeSettings(opacity=0.5)
+            )
+        ),
+    ]
+    file = VisualDatasetFile(path='iamthe.path', tags={'type': 'MRI', 'creator': 'me'})
+    expected = [
+        '`name` was defined 2 times for "iamthe.path"',
+        '`author` is unset for "iamthe.path"',
+        '`niivue_defaults.colormap` is unset for "iamthe.path"',
+        '`niivue_defaults.opacity` was defined 2 times for "iamthe.path"'
+    ]
+    assert check_indexed_file_has_options(file, options) == unordered(expected)
+
+
+@pytest.mark.parametrize(
+    'a, b, expected',
+    [
+        (
+            {},
+            {},
+            True
+        ),
+        (
+            {'a': 'b'},
+            {'a': 'b'},
+            True
+        ),
+        (
+            {'a': 'b'},
+            {'a': 'c'},
+            False
+        ),
+        (
+            {'a': 'b', 'c': 'd'},
+            {'a': 'b', 'c': 'd', 'e': 'f'},
+            True
+        ),
+        (
+            {'a': 'b', 'c': 'd', 'e': 'f'},
+            {'a': 'b', 'c': 'd'},
+            False
+        ),
+    ]
+)
+def test_dict_is_subset(a: dict[str, str], b: dict[str, str], expected: bool):
+    assert dict_is_subset(a, b) == expected
diff --git a/visualdataset/nifti_dataset.py b/visualdataset/nifti_dataset.py
index 06c94e5..2b70d38 100644
--- a/visualdataset/nifti_dataset.py
+++ b/visualdataset/nifti_dataset.py
@@ -8,6 +8,7 @@
 from visualdataset.index_nifti_dir import index_nifti_dir
 from visualdataset.manifest import VisualDatasetFile, OptionsLink, VisualDatasetManifest
 from visualdataset.nifti_sidecar import create_sidecar
+from visualdataset.validate import check_indexed_file_has_options
 
 
 def nifti_dataset(
@@ -25,6 +26,10 @@ def nifti_dataset(
         print(f'Error: nothing matched for: {[m.regex for m in matchers]}')
         sys.exit(1)
 
+    for file in index:
+        for warning_message in check_indexed_file_has_options(file, options):
+            print(warning_message)
+
     first_run_index_nums = find_first_run_files(input_dir, index, first_run_files)
 
     with tqdm(index, desc='Writing outputs') as pbar:
diff --git a/visualdataset/validate.py b/visualdataset/validate.py
new file mode 100644
index 0000000..ec12938
--- /dev/null
+++ b/visualdataset/validate.py
@@ -0,0 +1,56 @@
+from collections import Counter
+from typing import Iterable, Sequence, Iterator, Mapping
+
+from visualdataset.manifest import VisualDatasetFile, OptionsLink
+from visualdataset.options import ChrisViewerFileOptions, NiivueVolumeSettings
+
+IMPORTANT_KEYS = ('name', 'author', 'niivue_defaults.colormap')
+"""
+Keys of ``ChrisViewerFileOptions`` which are important. If a file lacks these options,
+then warnings should be printed.
+"""
+
+
+def check_indexed_file_has_options(file: VisualDatasetFile, options: Sequence[OptionsLink]) -> Sequence[str]:
+    """
+    Validate that:
+
+    1. No option is defined more than once
+    2. Some important options are defined once
+    """
+    matched_options = [o.options for o in options if dict_is_subset(o.match, file.tags)]
+    counts = _count_option_keys(matched_options)
+    multiple = {k: v for k, v in counts.items() if v > 1}
+    left_out = [k for k, v in counts.items() if v == 0 and k in IMPORTANT_KEYS]
+    return ([f'`{k}` was defined {v} times for "{file.path}"' for k, v in multiple.items()]
+            + [f'`{k}` is unset for "{file.path}"' for k in left_out])
+
+
+def dict_is_subset(a: Mapping[str, str], b: Mapping[str, str]) -> bool:
+    """
+    :return: True if all key-value pairs in a are also in b
+    """
+    return all(k in b and b[k] == v for k, v in a.items())
+
+
+def _count_option_keys(matched_options: Sequence[ChrisViewerFileOptions]):
+    """
+    Count the number of times each option key and each niivue_defaults setting is defined.
+    """
+    counter = _create_counter()
+    for options in matched_options:
+        for k in options.keys():
+            if k == 'niivue_defaults':
+                continue
+            counter[k] += 1
+        if 'niivue_defaults' in options:
+            for k in options['niivue_defaults'].keys():
+                counter[f'niivue_defaults.{k}'] += 1
+    return counter
+
+
+def _create_counter():
+    options_keys = {k: 0 for k in ChrisViewerFileOptions.__annotations__.keys()}
+    del options_keys['niivue_defaults']
+    niivue_keys = {f'niivue_defaults.{k}': 0 for k in NiivueVolumeSettings.__annotations__.keys()}
+    return Counter(**options_keys, **niivue_keys)

From 9459347b715b6bc0c1ee8dc0f9015bea9e4e740d Mon Sep 17 00:00:00 2001
From: Jennings Zhang <jenni_zh@protonmail.com>
Date: Sun, 4 Feb 2024 15:35:41 -0500
Subject: [PATCH 6/6] Add arg --first-run-tags

---
 visualdataset/__main__.py      | 10 +++++++---
 visualdataset/nifti_dataset.py |  9 ++++++++-
 visualdataset/validate.py      |  2 +-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/visualdataset/__main__.py b/visualdataset/__main__.py
index b118b01..08df7df 100644
--- a/visualdataset/__main__.py
+++ b/visualdataset/__main__.py
@@ -19,13 +19,16 @@
                     help='Metadata to go with tag sets')
 parser.add_argument('-s', '--string-args', action='store_true',
                     help='Interpret --matchers and --options as data instead of paths')
-parser.add_argument('--first-run-files', type=str,
+parser.add_argument('--first-run-files', type=str, default='[]',
                     help='List of files to show on first run, '
                          'as a stringified JSON list of paths relative to inputdir')
+parser.add_argument('--first-run-tags', type=str, default='{}',
+                    help='Tags to show on first run as a stringified JSON object')
 parser.add_argument('--readme', type=str,
                     help='README file content')
 
 _LIST_ADAPTER = TypeAdapter(list[str])
+_DICT_ADAPTER = TypeAdapter(dict[str, str])
 
 
 @chris_plugin(
@@ -38,9 +41,10 @@
 def main(options: Namespace, inputdir: Path, outputdir: Path):
     matchers, tag_options = parse_args(options.matchers, options.options,
                                        None if options.string_args else inputdir)
-    first_run_files = [] if options.first_run_files is None else _LIST_ADAPTER.validate_json(options.first_run_files)
+    first_run_files = _LIST_ADAPTER.validate_json(options.first_run_files)
+    first_run_tags = _DICT_ADAPTER.validate_json(options.first_run_tags)
     print(DISPLAY_TITLE, flush=True)
-    nifti_dataset(inputdir, outputdir, matchers, tag_options, first_run_files, options.readme)
+    nifti_dataset(inputdir, outputdir, matchers, tag_options, first_run_files, first_run_tags, options.readme)
 
 
 if __name__ == '__main__':
diff --git a/visualdataset/nifti_dataset.py b/visualdataset/nifti_dataset.py
index 2b70d38..36065dd 100644
--- a/visualdataset/nifti_dataset.py
+++ b/visualdataset/nifti_dataset.py
@@ -8,7 +8,7 @@
 from visualdataset.index_nifti_dir import index_nifti_dir
 from visualdataset.manifest import VisualDatasetFile, OptionsLink, VisualDatasetManifest
 from visualdataset.nifti_sidecar import create_sidecar
-from visualdataset.validate import check_indexed_file_has_options
+from visualdataset.validate import check_indexed_file_has_options, dict_is_subset
 
 
 def nifti_dataset(
@@ -17,6 +17,7 @@ def nifti_dataset(
         matchers: Sequence[Matcher],
         options: Sequence[OptionsLink],
         first_run_files: Sequence[str],
+        first_run_tags: Mapping[str, str],
         readme: Optional[str]
 ):
     with tqdm(desc='Scanning input directory...'):
@@ -31,6 +32,12 @@ def nifti_dataset(
             print(warning_message)
 
     first_run_index_nums = find_first_run_files(input_dir, index, first_run_files)
+    first_run_file_index = (index[i] for i in first_run_index_nums)
+    first_run_known_tags = (file.tags for file in first_run_file_index)
+    if not all(dict_is_subset(first_run_tags, tags) for tags in first_run_known_tags):
+        print('Error: value for --first-run-tags is not a subset of every matched tag '
+              'for the files of --first-run-files')
+        sys.exit(1)
 
     with tqdm(index, desc='Writing outputs') as pbar:
         for file in pbar:
diff --git a/visualdataset/validate.py b/visualdataset/validate.py
index ec12938..3582aa1 100644
--- a/visualdataset/validate.py
+++ b/visualdataset/validate.py
@@ -1,5 +1,5 @@
 from collections import Counter
-from typing import Iterable, Sequence, Iterator, Mapping
+from typing import Sequence, Mapping
 
 from visualdataset.manifest import VisualDatasetFile, OptionsLink
 from visualdataset.options import ChrisViewerFileOptions, NiivueVolumeSettings