Skip to content

Commit

Permalink
Update for ComStock compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
nweires committed Mar 21, 2024
1 parent 213bb27 commit 385516b
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 38 deletions.
2 changes: 1 addition & 1 deletion buildstockbatch/aws/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region):
weather_dir = sim_dir / "weather"
os.makedirs(weather_dir, exist_ok=True)

epws_to_download = docker_base.determine_epws_needed_for_job(sim_dir, jobs_d)
epws_to_download = docker_base.determine_weather_files_needed_for_job(sim_dir, jobs_d)

# Download the epws needed for these simulations
for epw_filename in epws_to_download:
Expand Down
63 changes: 36 additions & 27 deletions buildstockbatch/cloud/docker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from dataclasses import dataclass
import docker
from fsspec.implementations.local import LocalFileSystem
import glob
import gzip
import itertools
from joblib import Parallel, delayed
Expand All @@ -36,18 +35,22 @@
logger = logging.getLogger(__name__)


def determine_epws_needed_for_job(sim_dir, jobs_d):
def determine_weather_files_needed_for_job(sim_dir, jobs_d):
"""
Gets the list of filenames for the weather data required for a job of simulations.
:param sim_dir: Path to the directory where job files are stored
:param jobs_d: Contents of a single job JSON file; contains the list of buildings to simulate in this job.
:returns: Set of epw filenames needed for this job of simulations.
:returns: Set of weather filenames needed for this job of simulations.
"""
# Fetch the mapping for building to weather file from options_lookup.tsv
epws_by_option, param_name = _epws_by_option(sim_dir / "lib" / "resources" / "options_lookup.tsv")

# ComStock requires these empty files to exist.
files_to_download = set(["empty.epw", "empty.stat", "empty.ddy"])

# Look through the buildstock.csv to find the appropriate location and epw
epws_to_download = set()
building_ids = [x[0] for x in jobs_d["batch"]]
with open(
sim_dir / "lib" / "housing_characteristics" / "buildstock.csv",
Expand All @@ -57,9 +60,11 @@ def determine_epws_needed_for_job(sim_dir, jobs_d):
csv_reader = csv.DictReader(f)
for row in csv_reader:
if int(row["Building"]) in building_ids:
epws_to_download.add(epws_by_option[row[param_name]])
epw_file = epws_by_option[row[param_name]]
root, _ = os.path.splitext(epw_file)
files_to_download.update((f"{root}.epw", f"{root}.stat", f"{root}.ddy"))

return epws_to_download
return files_to_download


def _epws_by_option(options_lookup_path):
Expand Down Expand Up @@ -164,14 +169,14 @@ def run_batch(self):
"""
with tempfile.TemporaryDirectory(prefix="bsb_") as tmpdir:
tmppath = pathlib.Path(tmpdir)
epws_to_copy, batch_info = self._run_batch_prep(tmppath)
weather_files_to_copy, batch_info = self._run_batch_prep(tmppath)

# Copy all the files to cloud storage
logger.info("Uploading files for batch...")
self.upload_batch_files_to_cloud(tmppath)

logger.info("Copying duplicate weather files...")
self.copy_files_at_cloud(epws_to_copy)
self.copy_files_at_cloud(weather_files_to_copy)

self.start_batch_job(batch_info)

Expand Down Expand Up @@ -207,16 +212,16 @@ def _run_batch_prep(self, tmppath):

# Collect simulations to queue (along with the EPWs those sims need)
logger.info("Preparing simulation batch jobs...")
batch_info, epws_needed = self._prep_jobs_for_batch(tmppath)
batch_info, files_needed = self._prep_jobs_for_batch(tmppath)

# Weather files
logger.info("Prepping weather files...")
epws_to_copy = self._prep_weather_files_for_batch(tmppath, epws_needed)
epws_to_copy = self._prep_weather_files_for_batch(tmppath, files_needed)

return (epws_to_copy, batch_info)

def _prep_weather_files_for_batch(self, tmppath, epws_needed_set):
"""Prepare the weather files (EPWs) needed by the batch.
def _prep_weather_files_for_batch(self, tmppath, weather_files_needed_set):
"""Prepare the weather files needed by the batch.
1. Downloads, if necessary, and extracts weather files to ``self._weather_dir``.
2. Ensures that all EPWs needed by the batch are present.
Expand All @@ -229,7 +234,7 @@ def _prep_weather_files_for_batch(self, tmppath, epws_needed_set):
:param tmppath: Unique weather files (compressed) will be copied into a 'weather' subdir
of this path.
:param epws_needed_set: A set of weather filenames needed by the batch.
:param weather_files_needed_set: A set of weather filenames needed by the batch.
:returns: an array of tuples where the first value is the filename of a file that will be
uploaded to cloud storage (because it's in the ``tmppath``), and the second value is the
Expand All @@ -242,12 +247,12 @@ def _prep_weather_files_for_batch(self, tmppath, epws_needed_set):
# Downloads, if necessary, and extracts weather files to ``self._weather_dir``
self._get_weather_files()

# Ensure all needed EPWs are present
# Ensure all needed weather files are present
logger.info("Ensuring all needed weather files are present...")
epw_files = set(map(lambda x: x.split("/")[-1], glob.glob(f"{self.weather_dir}/*.epw")))
weather_files = os.listdir(self.weather_dir)
missing_epws = set()
for needed_epw in epws_needed_set:
if needed_epw not in epw_files:
for needed_epw in weather_files_needed_set:
if needed_epw not in weather_files:
missing_epws.add(needed_epw)
if missing_epws:
raise ValidationError(
Expand All @@ -258,7 +263,7 @@ def _prep_weather_files_for_batch(self, tmppath, epws_needed_set):

# Determine the unique weather files
logger.info("Calculating hashes for weather files")
epw_filenames = list(epws_needed_set)
epw_filenames = list(weather_files_needed_set)
epw_hashes = Parallel(n_jobs=-1, verbose=9)(
delayed(calc_hash_for_file)(pathlib.Path(self.weather_dir) / epw_filename)
for epw_filename in epw_filenames
Expand Down Expand Up @@ -298,7 +303,7 @@ def _prep_weather_files_for_batch(self, tmppath, epws_needed_set):
dupe_count += count - 1
dupe_bytes += bytes * (count - 1)
logger.info(
f"Weather files: {len(epws_needed_set):,}/{len(epw_files):,} referenced; "
f"Weather files: {len(weather_files_needed_set):,}/{len(weather_files):,} referenced; "
f"{len(unique_epws):,} unique ({(upload_bytes / 1024 / 1024):,.1f} MiB to upload), "
f"{dupe_count:,} duplicates ({(dupe_bytes / 1024 / 1024):,.1f} MiB saved from uploading)"
)
Expand Down Expand Up @@ -333,7 +338,7 @@ def _prep_jobs_for_batch(self, tmppath):

# Ensure all weather files are available
logger.debug("Determining which weather files are needed...")
epws_needed = self._determine_epws_needed_for_batch(df)
files_needed = self._determine_weather_files_needed_for_batch(df)

# Write each batch of simulations to a file.
logger.info("Queueing jobs")
Expand Down Expand Up @@ -384,9 +389,12 @@ def _prep_jobs_for_batch(self, tmppath):
"lib/housing_characteristics",
)

return DockerBatchBase.BatchInfo(n_sims=n_sims, n_sims_per_job=n_sims_per_job, job_count=job_count), epws_needed
return (
DockerBatchBase.BatchInfo(n_sims=n_sims, n_sims_per_job=n_sims_per_job, job_count=job_count),
files_needed,
)

def _determine_epws_needed_for_batch(self, buildstock_df):
def _determine_weather_files_needed_for_batch(self, buildstock_df):
"""
Gets the list of EPW filenames required for a batch of simulations.
:param buildstock_df: DataFrame of the buildstock batch being simulated.
Expand All @@ -398,7 +406,7 @@ def _determine_epws_needed_for_batch(self, buildstock_df):
)

# Iterate over all values in the `param_name` column and collect the referenced EPWs
epws_needed = set()
files_needed = set(["empty.epw", "empty.stat", "empty.ddy"])
for lookup_value in buildstock_df[param_name]:
if not lookup_value:
raise ValidationError(
Expand All @@ -409,11 +417,12 @@ def _determine_epws_needed_for_batch(self, buildstock_df):
if not epw_path:
raise ValidationError(f"Did not find an EPW for '{lookup_value}'")

# Add just the filename (without relative path)
epws_needed.add(epw_path.split("/")[-1])
# Add just the filenames (without relative path)
root, _ = os.path.splitext(os.path.basename(epw_path))
files_needed.update((f"{root}.epw", f"{root}.stat", f"{root}.ddy"))

logger.debug(f"Unique EPWs needed for this buildstock: {len(epws_needed):,}")
return epws_needed
logger.debug(f"Unique weather files needed for this buildstock: {len(files_needed):,}")
return files_needed

@classmethod
def run_simulations(cls, cfg, job_id, jobs_d, sim_dir, fs, output_path):
Expand Down
31 changes: 21 additions & 10 deletions buildstockbatch/test/test_docker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,22 @@ def test_run_batch_prep(basic_residential_project_file, mocker):

with tempfile.TemporaryDirectory(prefix="bsb_") as tmpdir:
tmppath = pathlib.Path(tmpdir)
epws_to_copy, batch_info = dbb._run_batch_prep(tmppath)
files_to_copy, batch_info = dbb._run_batch_prep(tmppath)
sampler_mock.run_sampling.assert_called_once()

# There are three weather files...
# There are three sets of weather files...
# * "G2500210.epw" is unique; check for it (gz'd) in tmppath
# * "G2601210.epw" and "G2601390.epw" are dupes. One should be in
# tmppath; one should be copied to the other according to ``epws_to_copy``
# tmppath; one should be copied to the other according to ``files_to_copy``
# Same for the .ddy and .stat files.
assert os.path.isfile(tmppath / "weather" / "G2500210.epw.gz")
assert os.path.isfile(tmppath / "weather" / "G2601210.epw.gz") or os.path.isfile(
tmppath / "weather" / "G2601390.epw.gz"
)
src, dest = epws_to_copy[0]
assert src in ("G2601210.epw.gz", "G2601390.epw.gz")
assert dest in ("G2601210.epw.gz", "G2601390.epw.gz")
assert src != dest
assert ("G2601210.epw.gz", "G2601390.epw.gz") in files_to_copy or (
"G2601390.epw.gz",
"G2601210.epw.gz",
) in files_to_copy

# Three job files should be created, with 10 total simulations, split
# into batches of 4, 4, and 2 simulations.
Expand Down Expand Up @@ -79,7 +80,7 @@ def test_run_batch_prep(basic_residential_project_file, mocker):
assert [building, 0] in simulations


def test_get_epws_to_download():
def test_get_weather_files_to_download():
resources_dir_path = pathlib.Path(resources_dir)
options_file = resources_dir_path / "options_lookup.tsv"
buildstock_file = resources_dir_path / "buildstock_good.csv"
Expand All @@ -100,8 +101,18 @@ def test_get_epws_to_download():
],
}

epws = docker_base.determine_epws_needed_for_job(sim_dir, jobs_d)
assert epws == {"weather/G2500210.epw", "weather/G2601390.epw"}
files = docker_base.determine_weather_files_needed_for_job(sim_dir, jobs_d)
assert files == {
"empty.epw",
"empty.stat",
"empty.ddy",
"weather/G2500210.epw",
"weather/G2601390.epw",
"weather/G2500210.ddy",
"weather/G2601390.ddy",
"weather/G2500210.stat",
"weather/G2601390.stat",
}


def test_run_simulations(basic_residential_project_file):
Expand Down
Binary file not shown.

0 comments on commit 385516b

Please sign in to comment.