Skip to content

Commit

Permalink
bump dataflow config, pin gcsfs, only storetozarr
Browse files Browse the repository at this point in the history
  • Loading branch information
norlandrhagen committed Nov 19, 2024
1 parent eb9bd01 commit bcf0561
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 17 deletions.
4 changes: 2 additions & 2 deletions configs/config_dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
c.Bake.bakery_class = "pangeo_forge_runner.bakery.dataflow.DataflowBakery"
c.Bake.container_image = "quay.io/leap-stc/rclone-beam:2024.09.24"
c.DataflowBakery.use_dataflow_prime = False
c.DataflowBakery.machine_type = "n2d-highmem-8"
c.DataflowBakery.max_num_workers = 10
c.DataflowBakery.machine_type = "n2d-highmem-2"
c.DataflowBakery.max_num_workers = 30
c.DataflowBakery.use_public_ips = True
c.DataflowBakery.service_account_email = (
"[email protected]"
Expand Down
22 changes: 8 additions & 14 deletions feedstock/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,13 @@
import os
import apache_beam as beam
from leap_data_management_utils.data_management_transforms import (
CopyRclone,
InjectAttrs,
get_catalog_store_urls,
)
from pangeo_forge_recipes.patterns import pattern_from_file_sequence
from pangeo_forge_recipes.transforms import (
OpenURLWithFSSpec,
OpenWithXarray,
StoreToZarr,
ConsolidateMetadata,
ConsolidateDimensionCoordinates,
)

# parse the catalog store locations (this is where the data is copied to after successful write (and maybe testing)
Expand Down Expand Up @@ -51,14 +47,12 @@
# Can we inject this in the same way as the root?
# Maybe its better to find another way and avoid injections entirely...
combine_dims=pattern_a.combine_dim_keys,
target_chunks={'time':200, 'latitude':200, 'longitude':720},
target_chunks={"time": 200, "latitude": 200, "longitude": 720},
)
| InjectAttrs()
| ConsolidateDimensionCoordinates()
| ConsolidateMetadata()
| CopyRclone(
target=catalog_store_urls["chirps-global-daily"].replace(
"https://nyu1.osn.mghpcc.org/", ""))
)


# | InjectAttrs()
# | ConsolidateDimensionCoordinates()
# | ConsolidateMetadata()
# | CopyRclone(
# target=catalog_store_urls["chirps-global-daily"].replace(
# "https://nyu1.osn.mghpcc.org/", ""))
)
2 changes: 1 addition & 1 deletion feedstock/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pangeo-forge-recipes==0.10.7
apache-beam[gcp]==2.58.0
gcsfs
gcsfs==2024.9.0
leap-data-management-utils[pangeo-forge] @ git+https://github.com/leap-stc/leap-data-management-utils.git@rclone-copy-stage
# leap-data-management-utils==0.0.12
pyopenssl >= 23.2.0

0 comments on commit bcf0561

Please sign in to comment.