Skip to content

Commit

Permalink
Add a fibad top-level method to get the dimensions of downloaded files
Browse files Browse the repository at this point in the history
- Rects in downloadCutout.py now track dimension of the file
- Downloader now is a proper object and holds more of its rect
  bookkeeping as instance state.
- Downloader creation is lightweight, and most work is still in .run()
- Example downloader notebook has been updated to use the new interface
  to generate histograms of downloaded data.
  • Loading branch information
mtauraso committed Sep 17, 2024
1 parent 3ee8380 commit 324fe8b
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 61 deletions.
31 changes: 30 additions & 1 deletion example_notebooks/GettingStartedDownloader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,39 @@
"metadata": {},
"outputs": [],
"source": [
"fibad_instance = fibad.Fibad(config_file=fibad_config)\n",
"import fibad\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"# os.chdir(Path(fibad.__file__).parent/\"..\"/\"..\")\n",
"fibad_instance = fibad.Fibad(config_file=\"fibad_config.toml\")\n",
"\n",
"fibad_instance.download()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"widths, heights = fibad_instance.raw_data_dimensions()\n",
"\n",
"fig, axs = plt.subplots(1, 2)\n",
"fig.set_figwidth(12)\n",
"\n",
"_, _, _ = axs[0].hist(heights, range=(260, 270), bins=10)\n",
"_, _, _ = axs[1].hist(widths, range=(260, 270), bins=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ examples = [
dev = [
"asv==0.6.4", # Used to compute performance benchmarks
"jupyter", # Clears output from Jupyter notebooks
"matplotlib", # For example notebooks
"pre-commit", # Used to run checks before finalizing a git commit
"pytest",
"pytest-cov", # Used to report total code coverage
Expand Down
126 changes: 68 additions & 58 deletions src/fibad/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import logging
from pathlib import Path
from threading import Thread
from typing import Optional
from typing import Optional, Union

Check warning on line 5 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L5

Added line #L5 was not covered by tests

from astropy.io import fits

Check warning on line 7 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L7

Added line #L7 was not covered by tests
from astropy.table import Table, hstack

import fibad.downloadCutout.downloadCutout as dC
Expand All @@ -21,12 +22,16 @@ class Downloader:
VARIABLE_FIELDS = ["tract", "ra", "dec"]

# These are the column names we retain when writing a rect out to the manifest.fits file
RECT_COLUMN_NAMES = VARIABLE_FIELDS + ["filter", "sw", "sh", "rerun", "type"]
RECT_COLUMN_NAMES = VARIABLE_FIELDS + ["filter", "sw", "sh", "rerun", "type", "dim"]

Check warning on line 25 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L25

Added line #L25 was not covered by tests

MANIFEST_FILE_NAME = "manifest.fits"

@staticmethod
def run(config):
def __init__(self, config):
self.config = config.get("download", {})
self.cutout_path = Path(self.config.get("cutout_dir")).resolve()
self.manifest_file = self.cutout_path / Downloader.MANIFEST_FILE_NAME

Check warning on line 32 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L29-L32

Added lines #L29 - L32 were not covered by tests

def run(self):

Check warning on line 34 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L34

Added line #L34 was not covered by tests
"""
Main entrypoint for downloading cutouts from HSC for use with fibad
Expand All @@ -36,42 +41,39 @@ def run(config):
Runtime configuration as a nested dictionary
"""

config = config.get("download", {})

logger.info("Download command Start")

fits_file = Path(config.get("fits_file", "")).resolve()
fits_file = Path(self.config.get("fits_file", "")).resolve()

Check warning on line 46 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L46

Added line #L46 was not covered by tests
logger.info(f"Reading in fits catalog: {fits_file}")
# Filter the fits file for the fields we want
column_names = ["object_id"] + Downloader.VARIABLE_FIELDS
locations = Downloader.filterfits(fits_file, column_names)

# If offet/length specified, filter to that length
offset = config.get("offset", 0)
end = offset + config.get("num_sources", None)
offset = self.config.get("offset", 0)
end = offset + self.config.get("num_sources", None)

Check warning on line 54 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L53-L54

Added lines #L53 - L54 were not covered by tests
if end is not None:
locations = locations[offset:end]

cutout_path = Path(config.get("cutout_dir")).resolve()
logger.info(f"Downloading cutouts to {cutout_path}")
logger.info(f"Downloading cutouts to {self.cutout_path}")

Check warning on line 58 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L58

Added line #L58 was not covered by tests

logger.info("Making a list of cutouts...")
# Make a list of rects to pass to downloadCutout
rects = Downloader.create_rects(
locations, offset=0, default=Downloader.rect_from_config(config), path=cutout_path
self.rects = Downloader.create_rects(

Check warning on line 62 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L62

Added line #L62 was not covered by tests
locations, offset=0, default=Downloader.rect_from_config(self.config), path=self.cutout_path
)

logger.info("Checking the list against currently downloaded cutouts...")
# Prune any previously downloaded rects from our list using the manifest from the previous download
rects = Downloader._prune_downloaded_rects(cutout_path, rects)
self.rects = self._prune_downloaded_rects()

Check warning on line 68 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L68

Added line #L68 was not covered by tests

# Early return if there is nothing to download.
if len(rects) == 0:
if len(self.rects) == 0:

Check warning on line 71 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L71

Added line #L71 was not covered by tests
logger.info("Download already complete according to manifest.")
return

# Create thread objects for each of our worker threads
num_threads = config.get("concurrent_connections", 2)
num_threads = self.config.get("concurrent_connections", 2)

Check warning on line 76 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L76

Added line #L76 was not covered by tests
if num_threads > 5:
raise RuntimeError("This client only opens 5 connections or fewer.")

Expand All @@ -89,22 +91,26 @@ def _batched(iterable, n):
yield batch

logger.info("Dividing cutouts among threads...")
thread_rects = list(_batched(rects, int(len(rects) / num_threads))) if num_threads != 1 else [rects]
thread_rects = (

Check warning on line 94 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L94

Added line #L94 was not covered by tests
list(_batched(self.rects, int(len(self.rects) / num_threads)))
if num_threads != 1
else [self.rects]
)

# Empty dictionaries for the threads to create download manifests in
thread_manifests = [dict() for _ in range(num_threads)]
self.thread_manifests = [dict() for _ in range(num_threads)]

Check warning on line 101 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L101

Added line #L101 was not covered by tests

shared_thread_args = (
config["username"],
config["password"],
DownloadStats(print_interval_s=config.get("stats_print_interval", 60)),
self.config["username"],
self.config["password"],
DownloadStats(print_interval_s=self.config.get("stats_print_interval", 60)),
)

shared_thread_kwargs = {
"retrywait": config.get("retry_wait", 30),
"retries": config.get("retries", 3),
"timeout": config.get("timeout", 3600),
"chunksize": config.get("chunk_size", 990),
"retrywait": self.config.get("retry_wait", 30),
"retries": self.config.get("retries", 3),
"timeout": self.config.get("timeout", 3600),
"chunksize": self.config.get("chunk_size", 990),
}

download_threads = [
Expand All @@ -114,7 +120,7 @@ def _batched(iterable, n):
daemon=True, # daemon so these threads will die when the main thread is interrupted
args=(thread_rects[i],) # rects
+ shared_thread_args # username, password, download stats
+ (i, thread_manifests[i]), # thread_num, manifest
+ (i, self.thread_manifests[i]), # thread_num, manifest
kwargs=shared_thread_kwargs,
)
for i in range(num_threads)
Expand All @@ -125,12 +131,11 @@ def _batched(iterable, n):
[thread.start() for thread in download_threads]
[thread.join() for thread in download_threads]
finally: # Ensure manifest is written even when we get a KeyboardInterrupt during download
Downloader.write_manifest(thread_manifests, cutout_path)
self._write_manifest()

Check warning on line 134 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L134

Added line #L134 was not covered by tests

logger.info("Done")

@staticmethod
def _prune_downloaded_rects(cutout_path: Path, rects: list[dC.Rect]) -> list[dC.Rect]:
def _prune_downloaded_rects(self):

Check warning on line 138 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L138

Added line #L138 was not covered by tests
"""Prunes already downloaded rects using the manifest in `cutout_path`. `rects` passed in is
mutated by this operation
Expand All @@ -155,13 +160,13 @@ def _prune_downloaded_rects(cutout_path: Path, rects: list[dC.Rect]) -> list[dC.
"""
# print(rects)
# Read in any prior manifest.
prior_manifest = Downloader.read_manifest(cutout_path)
prior_manifest = self.manifest_to_rects()

Check warning on line 163 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L163

Added line #L163 was not covered by tests

# If we found a manifest, we are resuming a download
if len(prior_manifest) != 0:
# Filter rects to figure out which ones are completely downloaded.
# This operation consumes prior_manifest in the process
rects[:] = [rect for rect in rects if Downloader._keep_rect(rect, prior_manifest)]
self.rects[:] = [rect for rect in self.rects if Downloader._keep_rect(rect, prior_manifest)]

Check warning on line 169 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L169

Added line #L169 was not covered by tests

# if prior_manifest was not completely consumed, than the earlier download attempted
# some sky locations which would not be included in the current download, and we have
Expand All @@ -170,12 +175,12 @@ def _prune_downloaded_rects(cutout_path: Path, rects: list[dC.Rect]) -> list[dC.
# print(len(prior_manifest))
# print (prior_manifest)
raise RuntimeError(
f"""{cutout_path/Downloader.MANIFEST_FILE_NAME} describes a download with
f"""{self.manifest_file} describes a download with
sky locations that would not be downloaded in the download currently being attempted. Are you sure you are
resuming the correct download? Deleting the manifest and cutout files will start the download from scratch"""
)

return rects
return self.rects

Check warning on line 183 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L183

Added line #L183 was not covered by tests

@staticmethod
def _keep_rect(location_rect: dC.Rect, prior_manifest: dict[dC.Rect, str]) -> bool:
Expand Down Expand Up @@ -217,8 +222,7 @@ def _keep_rect(location_rect: dC.Rect, prior_manifest: dict[dC.Rect, str]) -> bo

return keep_rect

@staticmethod
def write_manifest(thread_manifests: list[dict[dC.Rect, str]], file_path: Path):
def _write_manifest(self):

Check warning on line 225 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L225

Added line #L225 was not covered by tests
"""Write out manifest fits file that is an inventory of the download.
The manifest fits file should have columns object_id, ra, dec, tract, filter, filename
Expand Down Expand Up @@ -246,24 +250,17 @@ def write_manifest(thread_manifests: list[dict[dC.Rect, str]], file_path: Path):
sh: Semi-height of the cutout box in degrees
rerun: The data release in use e.g. pdr3_wide
type: coadd, warp, or other values allowed by the HSC docs
dim: Tuple of integers with the dimensions of the image.
Parameters
----------
thread_manifests : list[dict[dC.Rect,str]]
Manifests mapping rects -> Filename or status message. Each manifest came from a separate thread.
file_path : Path
Full path to the location where the manifest file ought be written. The manifest file will be
named manifest.fits
"""
logger.info("Assembling download manifest")
# Start building a combined manifest from all threads from the ground truth of the prior manifest
# in this directory, which we will be overwriting.
combined_manifest = Downloader.read_manifest(file_path)
combined_manifest = self.manifest_to_rects()

Check warning on line 259 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L259

Added line #L259 was not covered by tests

# Combine all thread manifests with the prior manifest, so that the current status of a downloaded
# rect overwrites any status from the prior run (which is no longer relevant.)
for manifest in thread_manifests:
for manifest in self.thread_manifests:

Check warning on line 263 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L263

Added line #L263 was not covered by tests
combined_manifest.update(manifest)

logger.info(f"Writing out download manifest with {len(combined_manifest)} entries.")
Expand Down Expand Up @@ -293,38 +290,50 @@ def write_manifest(thread_manifests: list[dict[dC.Rect, str]], file_path: Path):
# print (key, len(val), val)

manifest_table = Table(columns)
manifest_table.write(file_path / Downloader.MANIFEST_FILE_NAME, overwrite=True, format="fits")
manifest_table.write(self.manifest_file, overwrite=True, format="fits")

Check warning on line 293 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L293

Added line #L293 was not covered by tests

logger.info("Finished writing download manifest")

@staticmethod
def read_manifest(file_path: Path) -> dict[dC.Rect, str]:
def get_manifest(self):

Check warning on line 297 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L297

Added line #L297 was not covered by tests
"""Get the current downloader manifest, which is a list of files where download has been attempted
The format of the table is outlined in _write_manifest()
Returns
-------
astropy.table.Table
The entire download manifest
"""
if self.manifest_file.exists():
return Table.read(self.manifest_file, format="fits")

Check warning on line 307 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L306-L307

Added lines #L306 - L307 were not covered by tests

return None

Check warning on line 309 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L309

Added line #L309 was not covered by tests

def manifest_to_rects(self) -> dict[dC.Rect, str]:

Check warning on line 311 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L311

Added line #L311 was not covered by tests
"""Read the manifest.fits file from the given directory and return its contents as a dictionary with
downloadCutout.Rectangles as keys and filenames as values.
If now manifest file is found, an empty dict is returned.
Parameters
----------
file_path : Path
Where to find the manifest file
Returns
-------
dict[dC.Rect, str]
A dictionary containing all the rects in the manifest and all the filenames, or empty dict if no
manifest is found.
"""
filename = file_path / Downloader.MANIFEST_FILE_NAME
if filename.exists():
manifest_table = Table.read(filename, format="fits")
manifest_table = self.get_manifest()
if manifest_table is not None:

Check warning on line 324 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L323-L324

Added lines #L323 - L324 were not covered by tests
rects = Downloader.create_rects(
locations=manifest_table, fields=Downloader.RECT_COLUMN_NAMES, path=file_path
locations=manifest_table, fields=Downloader.RECT_COLUMN_NAMES, path=self.cutout_path
)
return {rect: filename for rect, filename in zip(rects, manifest_table["filename"])}
else:
return {}

@staticmethod
def _rect_hook(rect: dC.Rect, filename: Union[Path, str]):
with fits.open(filename) as hdul:
rect.dim = hdul[1].shape

Check warning on line 335 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L332-L335

Added lines #L332 - L335 were not covered by tests

@staticmethod
def download_thread(
rects: list[dC.Rect],
Expand Down Expand Up @@ -365,6 +374,7 @@ def download_thread(
password=password,
onmemory=False,
request_hook=stats_hook,
rect_hook=Downloader._rect_hook,
manifest=manifest,
**kwargs,
)
Expand Down Expand Up @@ -459,7 +469,7 @@ def create_rects(
rects = []
fields = fields if fields else Downloader.VARIABLE_FIELDS
for index, location in enumerate(locations):
args = {field: location[field] for field in fields}
args = {field: location.get(field) for field in fields}

Check warning on line 472 in src/fibad/download.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/download.py#L472

Added line #L472 was not covered by tests
args["lineno"] = index + offset
args["tract"] = str(args["tract"])
# Sets the file name on the rect to be the object_id, also includes other rect fields
Expand Down
Loading

0 comments on commit 324fe8b

Please sign in to comment.