From 07ce98c92930b75c8c021c9115601a40246731ef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 19:53:00 +0000 Subject: [PATCH 01/62] Bump cfn-lint from 1.9.5 to 1.9.7 Bumps [cfn-lint](https://github.com/aws-cloudformation/cfn-lint) from 1.9.5 to 1.9.7. - [Release notes](https://github.com/aws-cloudformation/cfn-lint/releases) - [Changelog](https://github.com/aws-cloudformation/cfn-lint/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-cloudformation/cfn-lint/compare/v1.9.5...v1.9.7) --- updated-dependencies: - dependency-name: cfn-lint dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index b9ca49deb..cae9a8e55 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -17,4 +17,4 @@ flake8-blind-except==0.2.1 flake8-builtins==2.5.0 setuptools==72.1.0 openapi-spec-validator==0.7.1 -cfn-lint==1.9.5 +cfn-lint==1.9.7 From 387fdfebf7cb3357d9d9110c07b6774ebd5a9fad Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Aug 2024 19:53:04 +0000 Subject: [PATCH 02/62] Bump pyyaml from 6.0.1 to 6.0.2 Bumps [pyyaml](https://github.com/yaml/pyyaml) from 6.0.1 to 6.0.2. - [Release notes](https://github.com/yaml/pyyaml/releases) - [Changelog](https://github.com/yaml/pyyaml/blob/main/CHANGES) - [Commits](https://github.com/yaml/pyyaml/compare/6.0.1...6.0.2) --- updated-dependencies: - dependency-name: pyyaml dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index b9ca49deb..7078816db 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -9,7 +9,7 @@ boto3==1.34.155 jinja2==3.1.4 moto[dynamodb]==5.0.12 pytest==8.3.2 -PyYAML==6.0.1 +PyYAML==6.0.2 responses==0.25.3 flake8==7.1.1 flake8-import-order==0.18.2 From 69cf6e123418f085211d1d35b5c67dffe014f520 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:58:56 +0000 Subject: [PATCH 03/62] Bump boto3 from 1.34.155 to 1.34.159 Bumps [boto3](https://github.com/boto/boto3) from 1.34.155 to 1.34.159. - [Release notes](https://github.com/boto/boto3/releases) - [Commits](https://github.com/boto/boto3/compare/1.34.155...1.34.159) --- updated-dependencies: - dependency-name: boto3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- requirements-apps-disable-private-dns.txt | 2 +- requirements-apps-start-execution-manager.txt | 2 +- requirements-apps-start-execution-worker.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements-all.txt b/requirements-all.txt index 273f16744..11f6b8327 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -5,7 +5,7 @@ -r requirements-apps-start-execution-worker.txt -r requirements-apps-disable-private-dns.txt -r requirements-apps-update-db.txt -boto3==1.34.155 +boto3==1.34.159 jinja2==3.1.4 moto[dynamodb]==5.0.12 pytest==8.3.2 diff --git a/requirements-apps-disable-private-dns.txt b/requirements-apps-disable-private-dns.txt index 48b50adb7..16c7ac7aa 100644 --- a/requirements-apps-disable-private-dns.txt +++ b/requirements-apps-disable-private-dns.txt @@ -1 +1 @@ -boto3==1.34.155 +boto3==1.34.159 diff --git a/requirements-apps-start-execution-manager.txt b/requirements-apps-start-execution-manager.txt index e4644e0ce..00ad69582 100644 --- a/requirements-apps-start-execution-manager.txt +++ b/requirements-apps-start-execution-manager.txt @@ -1,3 +1,3 @@ -boto3==1.34.155 +boto3==1.34.159 ./lib/dynamo/ ./lib/lambda_logging/ diff --git a/requirements-apps-start-execution-worker.txt b/requirements-apps-start-execution-worker.txt index bd5121fdc..aca0bc7ac 100644 --- a/requirements-apps-start-execution-worker.txt +++ b/requirements-apps-start-execution-worker.txt @@ -1,2 +1,2 @@ -boto3==1.34.155 +boto3==1.34.159 ./lib/lambda_logging/ From 38dca680c2547ec0b703debd92c12615b73203b7 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 23 Aug 2024 15:07:32 -0500 Subject: [PATCH 04/62] sketching out per-job compute environments --- apps/compute-cf.yml.j2 | 44 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 0cb23e152..2faaa0b2c 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -1,5 +1,9 @@ AWSTemplateFormatVersion: 2010-09-09 +# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-batch-computeenvironment-computeresources.html +# TODO: This is where the instance AMI and Type are specified. To have multiple instance types and amis per deployment, this +# will need to support that. + Parameters: VpcId: @@ -63,18 +67,47 @@ Resources: --==BOUNDARY==-- - ComputeEnvironment: + # TODO: For per-instance instance types and AMIs, we will likely need + # multiple Compute Environments and Job Queues. + # ComputeEnvironment: + # Type: AWS::Batch::ComputeEnvironment + # Properties: + # ServiceRole: !GetAtt BatchServiceRole.Arn + # Type: MANAGED + # ComputeResources: + # Type: SPOT + # AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED + # MinvCpus: 0 + # MaxvCpus: !Ref MaxvCpus + # InstanceTypes: !Ref InstanceTypes + # ImageId: !Ref AmiId + # Subnets: !Ref SubnetIds + # InstanceRole: !Ref InstanceProfile + # SecurityGroupIds: + # - !Ref SecurityGroup + # LaunchTemplate: + # LaunchTemplateId: !Ref LaunchTemplate + # Version: !GetAtt LaunchTemplate.LatestVersionNumber + # Tags: + # Name: !Ref AWS::StackName + + # TODO: Render a compute environment for each job spec. + {% for job_spec in job_specs.items() %} + {{ job_type }}ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: ServiceRole: !GetAtt BatchServiceRole.Arn Type: MANAGED ComputeResources: - Type: SPOT + Type: SPOT # TODO: Type and Allocation Strategy per Instance? AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED - MinvCpus: 0 + MinvCpus: 0 # TODO: vCPU count per instance? MaxvCpus: !Ref MaxvCpus - InstanceTypes: !Ref InstanceTypes - ImageId: !Ref AmiId + InstanceTypes: + {% for instance_type in job_spec['instance_types'].items() %} + - {{ instance_type }} + {% endfor %} + ImageId: {{ job_spec['ami_id'] }} Subnets: !Ref SubnetIds InstanceRole: !Ref InstanceProfile SecurityGroupIds: @@ -84,6 +117,7 @@ Resources: Version: !GetAtt LaunchTemplate.LatestVersionNumber Tags: Name: !Ref AWS::StackName + {% endfor %} SchedulingPolicy: Type: AWS::Batch::SchedulingPolicy From fcad9dceda8942f5a22991050f6338115e4b4080 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 23 Aug 2024 15:07:51 -0500 Subject: [PATCH 05/62] notes in important locations for per-instance types and amis --- .github/workflows/deploy-daac.yml | 2 ++ Makefile | 2 ++ apps/main-cf.yml.j2 | 5 +++++ apps/render_cf.py | 5 +++++ job_spec/SRG_GSLC.yml | 6 ++++++ 5 files changed, 20 insertions(+) diff --git a/.github/workflows/deploy-daac.yml b/.github/workflows/deploy-daac.yml index c6af17100..f499b9af3 100644 --- a/.github/workflows/deploy-daac.yml +++ b/.github/workflows/deploy-daac.yml @@ -26,11 +26,13 @@ jobs: cost_profile: EDC deploy_ref: refs/heads/main job_files: job_spec/AUTORIFT.yml job_spec/INSAR_GAMMA.yml job_spec/RTC_GAMMA.yml job_spec/INSAR_ISCE_BURST.yml + # TODO: Instance types will need to be moved to the Job Spec file. instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge default_max_vcpus: 1500 expanded_max_vcpus: 3000 required_surplus: 2000 security_environment: EDC + # TODO: AMIs will need to be moved to the Job Spec file. ami_id: image_id_ecs_amz2 distribution_url: 'https://d3gm2hf49xd6jj.cloudfront.net' diff --git a/Makefile b/Makefile index c97f2c118..b89da8437 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,8 @@ run: render install: python -m pip install -r requirements-all.txt + +# TODO: This is where render is called and provided the job type files files ?= job_spec/*.yml security_environment ?= ASF api_name ?= local diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index bc5ad6135..10c22c4a1 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -51,6 +51,7 @@ Parameters: - false - true + # TODO: Mutliple Instance Type Support AmiId: Type: AWS::SSM::Parameter::Value Default: /aws/service/ecs/optimized-ami/amazon-linux-2023/recommended/image_id @@ -79,6 +80,7 @@ Parameters: MinValue: 0 Default: 0 + # TODO: Mutliple Instance Type Support InstanceTypes: Description: EC2 instance types to include in AWS Batch Compute Environment Type: CommaDelimitedList @@ -136,6 +138,9 @@ Resources: {% endif %} TemplateURL: api/api-cf.yml + # TODO: Mutliple Instance Type Support + # compute-cf.yml needs to support per-job + # instance and ami types. Cluster: Type: AWS::CloudFormation::Stack Properties: diff --git a/apps/render_cf.py b/apps/render_cf.py index 1907d065a..b1a21ed9f 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -67,6 +67,11 @@ def main(): parser.add_argument('-c', '--cost-profile', default='DEFAULT', choices=['DEFAULT', 'EDC']) args = parser.parse_args() + # TODO: Job Types are provided here, by the Makefile. + # The Job Types will need to contain a parsable field + # that contains the information needed for per-instance + # AMIs, types, and resources. + job_types = {} for file in args.job_spec_files: job_types.update(yaml.safe_load(file.read_text())) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index d3afa07c9..54af7a778 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -30,6 +30,7 @@ SRG_GSLC: command: - ++process - back_projection + - --gpu - --bucket - '!Ref Bucket' - --bucket-prefix @@ -38,6 +39,11 @@ SRG_GSLC: timeout: 10800 vcpu: 1 memory: 31500 + # TODO: GPU Resource Requirment is specified here, + # but the instance type and AMI come from + # compute-cf.yml + # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-batch-jobdefinition-resourcerequirement.html + gpu: 1 secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD From 8396463dfdd7a024850e6c2989009a389f4d86b0 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 23 Aug 2024 15:17:21 -0500 Subject: [PATCH 06/62] rendering per-job compute environments --- apps/compute-cf.yml.j2 | 31 ++++------------------------- job_spec/ARIA_AUTORIFT.yml | 10 ++++++++++ job_spec/ARIA_RAIDER.yml | 10 ++++++++++ job_spec/AUTORIFT.yml | 10 ++++++++++ job_spec/AUTORIFT_ITS_LIVE.yml | 10 ++++++++++ job_spec/INSAR_GAMMA.yml | 10 ++++++++++ job_spec/INSAR_ISCE.yml | 10 ++++++++++ job_spec/INSAR_ISCE_BURST.yml | 10 ++++++++++ job_spec/RTC_GAMMA.yml | 10 ++++++++++ job_spec/S1_CORRECTION_ITS_LIVE.yml | 10 ++++++++++ job_spec/SRG_GSLC.yml | 5 +++++ job_spec/WATER_MAP.yml | 10 ++++++++++ job_spec/WATER_MAP_EQ.yml | 10 ++++++++++ 13 files changed, 119 insertions(+), 27 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 2faaa0b2c..0fe1ac281 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -67,32 +67,8 @@ Resources: --==BOUNDARY==-- - # TODO: For per-instance instance types and AMIs, we will likely need - # multiple Compute Environments and Job Queues. - # ComputeEnvironment: - # Type: AWS::Batch::ComputeEnvironment - # Properties: - # ServiceRole: !GetAtt BatchServiceRole.Arn - # Type: MANAGED - # ComputeResources: - # Type: SPOT - # AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED - # MinvCpus: 0 - # MaxvCpus: !Ref MaxvCpus - # InstanceTypes: !Ref InstanceTypes - # ImageId: !Ref AmiId - # Subnets: !Ref SubnetIds - # InstanceRole: !Ref InstanceProfile - # SecurityGroupIds: - # - !Ref SecurityGroup - # LaunchTemplate: - # LaunchTemplateId: !Ref LaunchTemplate - # Version: !GetAtt LaunchTemplate.LatestVersionNumber - # Tags: - # Name: !Ref AWS::StackName - - # TODO: Render a compute environment for each job spec. - {% for job_spec in job_specs.items() %} + # TODO: Render a compute environment for each job spec. + {% for job_type, job_spec in job_types.items() %} {{ job_type }}ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: @@ -104,7 +80,7 @@ Resources: MinvCpus: 0 # TODO: vCPU count per instance? MaxvCpus: !Ref MaxvCpus InstanceTypes: - {% for instance_type in job_spec['instance_types'].items() %} + {% for instance_type in job_spec['instance_types'] %} - {{ instance_type }} {% endfor %} ImageId: {{ job_spec['ami_id'] }} @@ -117,6 +93,7 @@ Resources: Version: !GetAtt LaunchTemplate.LatestVersionNumber Tags: Name: !Ref AWS::StackName + {% endfor %} SchedulingPolicy: diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 369584bb4..821fd3bc3 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -61,3 +61,13 @@ AUTORIFT: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index a2586092e..fd4e9ae8a 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -52,3 +52,13 @@ ARIA_RAIDER: - RAIDER_HRES_EMAIL - RAIDER_HRES_API_KEY - RAIDER_HRES_URL + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index 73240557e..7aa25c0ed 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -60,3 +60,13 @@ AUTORIFT: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index 79e9c6373..f20fb911a 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -74,3 +74,13 @@ AUTORIFT: - EARTHDATA_PASSWORD - PUBLISH_ACCESS_KEY_ID - PUBLISH_SECRET_ACCESS_KEY + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index a4605062d..4bc98fc34 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -119,3 +119,13 @@ INSAR_GAMMA: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 8715ca330..5a54b2889 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -154,3 +154,13 @@ INSAR_ISCE: - RAIDER_HRES_EMAIL - RAIDER_HRES_API_KEY - RAIDER_HRES_URL + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index 512f7d276..b681ba25f 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -67,3 +67,13 @@ INSAR_ISCE_BURST: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index e94002059..a95afa279 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -142,3 +142,13 @@ RTC_GAMMA: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index 40995ff13..b4db588a4 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -52,3 +52,13 @@ S1_CORRECTION_TEST: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 54af7a778..ac98d9a89 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -47,3 +47,8 @@ SRG_GSLC: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD + instance_types: + - g4dn.xlarge + - g4dn.2xlarge + - g6.xlarge + ami_id: instance_with_gpu_support.aws.com \ No newline at end of file diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 26ee311bf..74264b05d 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -203,3 +203,13 @@ WATER_MAP: timeout: 86400 vcpu: 1 memory: 126000 + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 202608a88..0a364b8a8 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -113,3 +113,13 @@ WATER_MAP_EQ: timeout: 36000 vcpu: 1 memory: 126000 + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file From e4611e9dbbf96a19f9f0efe81ce34a85872f1777 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 23 Aug 2024 15:26:34 -0500 Subject: [PATCH 07/62] make compute env field with name --- apps/compute-cf.yml.j2 | 7 +++---- job_spec/ARIA_AUTORIFT.yml | 22 ++++++++++++---------- job_spec/ARIA_RAIDER.yml | 22 ++++++++++++---------- job_spec/AUTORIFT.yml | 22 ++++++++++++---------- job_spec/AUTORIFT_ITS_LIVE.yml | 22 ++++++++++++---------- job_spec/INSAR_GAMMA.yml | 22 ++++++++++++---------- job_spec/INSAR_ISCE.yml | 22 ++++++++++++---------- job_spec/INSAR_ISCE_BURST.yml | 22 ++++++++++++---------- job_spec/RTC_GAMMA.yml | 22 ++++++++++++---------- job_spec/S1_CORRECTION_ITS_LIVE.yml | 22 ++++++++++++---------- job_spec/SRG_GSLC.yml | 12 +++++++----- job_spec/WATER_MAP.yml | 22 ++++++++++++---------- job_spec/WATER_MAP_EQ.yml | 22 ++++++++++++---------- 13 files changed, 142 insertions(+), 119 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 0fe1ac281..1955eaf30 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -69,7 +69,7 @@ Resources: # TODO: Render a compute environment for each job spec. {% for job_type, job_spec in job_types.items() %} - {{ job_type }}ComputeEnvironment: + {{ job_spec['compute_environment']['name'] }}: Type: AWS::Batch::ComputeEnvironment Properties: ServiceRole: !GetAtt BatchServiceRole.Arn @@ -80,10 +80,10 @@ Resources: MinvCpus: 0 # TODO: vCPU count per instance? MaxvCpus: !Ref MaxvCpus InstanceTypes: - {% for instance_type in job_spec['instance_types'] %} + {% for instance_type in job_spec['compute_environment']['instance_types'] %} - {{ instance_type }} {% endfor %} - ImageId: {{ job_spec['ami_id'] }} + ImageId: {{ job_spec['compute_environment']['ami_id'] }} Subnets: !Ref SubnetIds InstanceRole: !Ref InstanceProfile SecurityGroupIds: @@ -95,7 +95,6 @@ Resources: Name: !Ref AWS::StackName {% endfor %} - SchedulingPolicy: Type: AWS::Batch::SchedulingPolicy diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 821fd3bc3..0988c2650 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -61,13 +61,15 @@ AUTORIFT: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'AriaAutoriftComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index fd4e9ae8a..83fc64514 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -52,13 +52,15 @@ ARIA_RAIDER: - RAIDER_HRES_EMAIL - RAIDER_HRES_API_KEY - RAIDER_HRES_URL - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'AriaRaiderComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index 7aa25c0ed..64615b986 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -60,13 +60,15 @@ AUTORIFT: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'AutoriftComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index f20fb911a..fdaf316e0 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -74,13 +74,15 @@ AUTORIFT: - EARTHDATA_PASSWORD - PUBLISH_ACCESS_KEY_ID - PUBLISH_SECRET_ACCESS_KEY - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'AutoriftItsLiveComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 4bc98fc34..22022a391 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -119,13 +119,15 @@ INSAR_GAMMA: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'InsarGammaComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 5a54b2889..024521f51 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -154,13 +154,15 @@ INSAR_ISCE: - RAIDER_HRES_EMAIL - RAIDER_HRES_API_KEY - RAIDER_HRES_URL - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'InsarIsceComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index b681ba25f..5cb6be4f0 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -67,13 +67,15 @@ INSAR_ISCE_BURST: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'InsarIsceBurstComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index a95afa279..7faba1e5b 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -142,13 +142,15 @@ RTC_GAMMA: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'RtcGammaComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index b4db588a4..290250558 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -52,13 +52,15 @@ S1_CORRECTION_TEST: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'S1CorrectionItsLiveComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index ac98d9a89..39aee3a5c 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -47,8 +47,10 @@ SRG_GSLC: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - instance_types: - - g4dn.xlarge - - g4dn.2xlarge - - g6.xlarge - ami_id: instance_with_gpu_support.aws.com \ No newline at end of file + compute_environment: + name: 'SrgGslcComputeEnvironment' + instance_types: + - g4dn.xlarge + - g4dn.2xlarge + - g6.xlarge + ami_id: instance_with_gpu_support.aws.com \ No newline at end of file diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 74264b05d..ec48eca0a 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -203,13 +203,15 @@ WATER_MAP: timeout: 86400 vcpu: 1 memory: 126000 - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'WaterMapComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 0a364b8a8..8d2102672 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -113,13 +113,15 @@ WATER_MAP_EQ: timeout: 36000 vcpu: 1 memory: 126000 - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + compute_environment: + name: 'WaterMapEqComputeEnvironment' + instance_types: + - r6id.xlarge + - r6id.2xlarge + - r6id.4xlarge + - r6id.8xlarge + - r6idn.xlarge + - r6idn.2xlarge + - r6idn.4xlarge + - r6idn.8xlarge + ami_id: abc1234abc1234.aws.com \ No newline at end of file From 18bf683706634c7331cea26325def96f5f52508c Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 23 Aug 2024 15:42:29 -0500 Subject: [PATCH 08/62] create a shared compute environment so jobs dont NEED to specify anything --- apps/compute-cf.yml.j2 | 24 ++++++++++++++++++++++++ job_spec/ARIA_AUTORIFT.yml | 12 +----------- job_spec/ARIA_RAIDER.yml | 12 +----------- job_spec/AUTORIFT.yml | 12 +----------- job_spec/AUTORIFT_ITS_LIVE.yml | 12 +----------- job_spec/INSAR_GAMMA.yml | 12 +----------- job_spec/INSAR_ISCE.yml | 12 +----------- job_spec/INSAR_ISCE_BURST.yml | 12 +----------- job_spec/RTC_GAMMA.yml | 12 +----------- job_spec/S1_CORRECTION_ITS_LIVE.yml | 12 +----------- job_spec/WATER_MAP.yml | 12 +----------- job_spec/WATER_MAP_EQ.yml | 12 +----------- 12 files changed, 35 insertions(+), 121 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 1955eaf30..d8a25f709 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -67,8 +67,31 @@ Resources: --==BOUNDARY==-- + SharedComputeEnvironment: + Type: AWS::Batch::ComputeEnvironment + Properties: + ServiceRole: !GetAtt BatchServiceRole.Arn + Type: MANAGED + ComputeResources: + Type: SPOT + AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED + MinvCpus: 0 + MaxvCpus: !Ref MaxvCpus + InstanceTypes: !Ref InstanceTypes + ImageId: !Ref ImageId + Subnets: !Ref SubnetIds + InstanceRole: !Ref InstanceProfile + SecurityGroupIds: + - !Ref SecurityGroup + LaunchTemplate: + LaunchTemplateId: !Ref LaunchTemplate + Version: !GetAtt LaunchTemplate.LatestVersionNumber + Tags: + Name: !Ref AWS::StackName + # TODO: Render a compute environment for each job spec. {% for job_type, job_spec in job_types.items() %} + {% if 'SharedComputeEnvironment' not in job_spec['compute_environment']['name'] %} {{ job_spec['compute_environment']['name'] }}: Type: AWS::Batch::ComputeEnvironment Properties: @@ -94,6 +117,7 @@ Resources: Tags: Name: !Ref AWS::StackName + {% endif %} {% endfor %} SchedulingPolicy: Type: AWS::Batch::SchedulingPolicy diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 0988c2650..093e75eea 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -62,14 +62,4 @@ AUTORIFT: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'AriaAutoriftComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index 83fc64514..f7336b591 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -53,14 +53,4 @@ ARIA_RAIDER: - RAIDER_HRES_API_KEY - RAIDER_HRES_URL compute_environment: - name: 'AriaRaiderComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index 64615b986..579b4c7bd 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -61,14 +61,4 @@ AUTORIFT: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'AutoriftComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index fdaf316e0..f38a784d8 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -75,14 +75,4 @@ AUTORIFT: - PUBLISH_ACCESS_KEY_ID - PUBLISH_SECRET_ACCESS_KEY compute_environment: - name: 'AutoriftItsLiveComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 22022a391..f8177cbb1 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -120,14 +120,4 @@ INSAR_GAMMA: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'InsarGammaComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 024521f51..1b4e4b9fa 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -155,14 +155,4 @@ INSAR_ISCE: - RAIDER_HRES_API_KEY - RAIDER_HRES_URL compute_environment: - name: 'InsarIsceComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index 5cb6be4f0..ed7ee9d5b 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -68,14 +68,4 @@ INSAR_ISCE_BURST: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'InsarIsceBurstComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index 7faba1e5b..ed52d2efb 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -143,14 +143,4 @@ RTC_GAMMA: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'RtcGammaComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index 290250558..ff3957c49 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -53,14 +53,4 @@ S1_CORRECTION_TEST: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'S1CorrectionItsLiveComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index ec48eca0a..9af97ae84 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -204,14 +204,4 @@ WATER_MAP: vcpu: 1 memory: 126000 compute_environment: - name: 'WaterMapComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 8d2102672..3b8fc1880 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -114,14 +114,4 @@ WATER_MAP_EQ: vcpu: 1 memory: 126000 compute_environment: - name: 'WaterMapEqComputeEnvironment' - instance_types: - - r6id.xlarge - - r6id.2xlarge - - r6id.4xlarge - - r6id.8xlarge - - r6idn.xlarge - - r6idn.2xlarge - - r6idn.4xlarge - - r6idn.8xlarge - ami_id: abc1234abc1234.aws.com \ No newline at end of file + name: 'SharedComputeEnvironment' \ No newline at end of file From 46f655440300942e655d90119b56bc16bd6e9cb7 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 13:46:40 -0500 Subject: [PATCH 09/62] refactor for single step function --- apps/compute-cf.yml.j2 | 52 ++++++++++++------ .../handle-batch-event-cf.yml.j2 | 8 +++ apps/main-cf.yml.j2 | 22 +++++--- apps/scale-cluster/scale-cluster-cf.yml.j2 | 55 ++++++++++++++++++- apps/step-function.json.j2 | 2 +- apps/workflow-cf.yml.j2 | 32 +++++++++-- 6 files changed, 137 insertions(+), 34 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index d8a25f709..20a0cbe49 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -27,11 +27,22 @@ Parameters: Outputs: - ComputeEnvironmentArn: - Value: !Ref ComputeEnvironment + SharedComputeEnvironmentArn: + Value: !Ref SharedComputeEnvironment - JobQueueArn: - Value: !Ref BatchJobQueue + SharedJobQueueArn: + Value: !Ref SharedBatchJobQueue + + {% for job_type, job_spec in job_types.items() %} + {% if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn: + Value: !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironment + + {{ job_spec['compute_environment']['name'] }}JobQueueArn: + Value: !Ref {{ job_spec['compute_environment']['name'] }}BatchJobQueue + + {% endif %} + {% endfor %} TaskRoleArn: Value: !GetAtt TaskRole.Arn @@ -89,18 +100,28 @@ Resources: Tags: Name: !Ref AWS::StackName - # TODO: Render a compute environment for each job spec. - {% for job_type, job_spec in job_types.items() %} - {% if 'SharedComputeEnvironment' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}: + SharedBatchJobQueue: + Type: AWS::Batch::JobQueue + Properties: + Priority: 1 + ComputeEnvironmentOrder: + - ComputeEnvironment: !Ref SharedComputeEnvironment + Order: 1 + SchedulingPolicyArn: !Ref SchedulingPolicy + + SchedulingPolicy: + Type: AWS::Batch::SchedulingPolicy + + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: ServiceRole: !GetAtt BatchServiceRole.Arn Type: MANAGED ComputeResources: - Type: SPOT # TODO: Type and Allocation Strategy per Instance? + Type: SPOT AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED - MinvCpus: 0 # TODO: vCPU count per instance? + MinvCpus: 0 MaxvCpus: !Ref MaxvCpus InstanceTypes: {% for instance_type in job_spec['compute_environment']['instance_types'] %} @@ -117,19 +138,16 @@ Resources: Tags: Name: !Ref AWS::StackName - {% endif %} - {% endfor %} - SchedulingPolicy: - Type: AWS::Batch::SchedulingPolicy - - BatchJobQueue: + {{ job_spec['compute_environment']['name'] }}JobQueue: Type: AWS::Batch::JobQueue Properties: Priority: 1 ComputeEnvironmentOrder: - - ComputeEnvironment: !Ref ComputeEnvironment + - ComputeEnvironment: !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironment Order: 1 SchedulingPolicyArn: !Ref SchedulingPolicy + + {% endfor %} TaskRole: Type: {{ 'Custom::JplRole' if security_environment in ('JPL', 'JPL-public') else 'AWS::IAM::Role' }} diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index c115a8f5a..cd2cbc3ce 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -5,6 +5,11 @@ Parameters: JobQueueArn: Type: String + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}JobQueueArn: + Type: String + {% endfor %} + JobsTable: Type: String @@ -95,6 +100,9 @@ Resources: detail: jobQueue: - !Ref JobQueueArn + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn + {% endfor %} status: - RUNNING Targets: diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index 10c22c4a1..a3d94ea70 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -51,7 +51,6 @@ Parameters: - false - true - # TODO: Mutliple Instance Type Support AmiId: Type: AWS::SSM::Parameter::Value Default: /aws/service/ecs/optimized-ami/amazon-linux-2023/recommended/image_id @@ -80,7 +79,6 @@ Parameters: MinValue: 0 Default: 0 - # TODO: Mutliple Instance Type Support InstanceTypes: Description: EC2 instance types to include in AWS Batch Compute Environment Type: CommaDelimitedList @@ -138,9 +136,6 @@ Resources: {% endif %} TemplateURL: api/api-cf.yml - # TODO: Mutliple Instance Type Support - # compute-cf.yml needs to support per-job - # instance and ami types. Cluster: Type: AWS::CloudFormation::Stack Properties: @@ -158,7 +153,10 @@ Resources: Condition: ScaleCluster Properties: Parameters: - ComputeEnvironmentArn: !GetAtt Cluster.Outputs.ComputeEnvironmentArn + ComputeEnvironmentArn: !GetAtt Cluster.Outputs.SharedComputeEnvironmentArn + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn + {% endfor %} DefaultMaxvCpus: !Ref DefaultMaxvCpus ExpandedMaxvCpus: !Ref ExpandedMaxvCpus MonthlyBudget: !Ref MonthlyBudget @@ -173,7 +171,10 @@ Resources: Type: AWS::CloudFormation::Stack Properties: Parameters: - JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn + JobQueueArn: !GetAtt Cluster.Outputs.SharedJobQueueArn + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}JobQueueArn: !GetAtt Cluster.Outputs.{{ job_spec['compute_environment']['name'] }}JobQueueArn + {% endfor %} JobsTable: !Ref JobsTable {% if security_environment == 'EDC' %} SecurityGroupId: !GetAtt Cluster.Outputs.SecurityGroupId @@ -185,7 +186,10 @@ Resources: Type: AWS::CloudFormation::Stack Properties: Parameters: - JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn + SharedJobQueueArn: !GetAtt Cluster.Outputs.SharedJobQueueArn + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}JobQueueArn: !GetAtt Cluster.Outputs.{{ job_spec['compute_environment']['name'] }}JobQueueArn + {% endfor %} TaskRoleArn: !GetAtt Cluster.Outputs.TaskRoleArn JobsTable: !Ref JobsTable Bucket: !Ref ContentBucket @@ -202,7 +206,7 @@ Resources: Type: AWS::CloudFormation::Stack Properties: Parameters: - StepFunctionArn: !GetAtt StepFunction.Outputs.StepFunctionArn + SharedStepFunctionArn: !GetAtt StepFunction.Outputs.StepFunctionArn TemplateURL: monitoring-cf.yml LogBucket: diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 340d8906c..d455b9401 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -5,6 +5,11 @@ Parameters: ComputeEnvironmentArn: Type: String + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn: + Type: String + {% endfor %} + DefaultMaxvCpus: Type: Number MinValue: 0 @@ -79,7 +84,11 @@ Resources: Resource: "*" - Effect: Allow Action: batch:UpdateComputeEnvironment - Resource: !Ref ComputeEnvironmentArn + Resource: + - !Ref ComputeEnvironmentArn + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + - !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn + {% endfor %} Lambda: Type: AWS::Lambda::Function @@ -118,6 +127,10 @@ Resources: Targets: - Arn: !GetAtt Lambda.Arn Id: lambda + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + - Arn: !GetAtt {{ job_spec['compute_environment']['name'] }}Lambda.arn + Id: {{ job_spec['compute_environment']['name'] }}lambda + {% endfor %} EventPermission: Type: AWS::Lambda::Permission @@ -126,3 +139,43 @@ Resources: Action: lambda:InvokeFunction Principal: events.amazonaws.com SourceArn: !GetAtt Schedule.Arn + + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}Lambda: + Type: AWS::Lambda::Function + Properties: + Environment: + Variables: + COMPUTE_ENVIRONMENT_ARN: !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn + MONTHLY_BUDGET: !Ref MonthlyBudget + DEFAULT_MAX_VCPUS: !Ref DefaultMaxvCpus + EXPANDED_MAX_VCPUS: !Ref ExpandedMaxvCpus + REQUIRED_SURPLUS: !Ref RequiredSurplus + Code: src/ + Handler: scale_cluster.lambda_handler + MemorySize: 128 + Role: !GetAtt Role.Arn + Runtime: python3.9 + Timeout: 30 + {% if security_environment == 'EDC' %} + VpcConfig: + SecurityGroupIds: + - !Ref SecurityGroupId + SubnetIds: !Ref SubnetIds + {% endif %} + + {{ job_spec['compute_environment']['name'] }}EventInvokeConfig: + Type: AWS::Lambda::EventInvokeConfig + Properties: + FunctionName: !Ref {{ job_spec['compute_environment']['name'] }}Lambda + Qualifier: $LATEST + MaximumRetryAttempts: 0 + + {{ job_spec['compute_environment']['name'] }}EventPermission: + Type: AWS::Lambda::Permission + Properties: + FunctionName: !GetAtt {{ job_spec['compute_environment']['name'] }}Lambda.Arn + Action: lambda:InvokeFunction + Principal: events.amazonaws.com + SourceArn: !GetAtt Schedule.Arn + {% endfor %} \ No newline at end of file diff --git a/apps/step-function.json.j2 b/apps/step-function.json.j2 index 57d0c98aa..bd494858e 100644 --- a/apps/step-function.json.j2 +++ b/apps/step-function.json.j2 @@ -207,7 +207,7 @@ "Parameters": { "JobDefinition": "{{ '${'+ snake_to_pascal_case(task['name']) + '}' }}", "JobName.$": "$.job_id", - "JobQueue": "${JobQueueArn}", + "JobQueue": "{{ '${'+ job_spec['compute_environment']['name'] +'JobQueueArn}' }}", "ShareIdentifier": "default", "SchedulingPriorityOverride.$": "$.priority", "Parameters.$": "$.job_parameters", diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 642d6014d..2251e88b0 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -2,9 +2,14 @@ AWSTemplateFormatVersion: 2010-09-09 Parameters: - JobQueueArn: + SharedJobQueueArn: Type: String + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name']%} + {{ job_spec['compute_environment']['name'] }}JobQueueArn: + Type: String + {% endfor %} + JobsTable: Type: String @@ -33,8 +38,14 @@ Parameters: Outputs: - StepFunctionArn: - Value: !Ref StepFunction + SharedStepFunctionArn: + Value: !Ref SharedStepFunction + + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name']%} + {{ job_spec['compute_environment']['name'] }}StepFunctionArn: + Value: !Ref {{ job_spec['compute_environment']['name'] }}StepFunction + {% endfor %} + Resources: {% for job_type, job_spec in job_types.items() %} @@ -76,13 +87,16 @@ Resources: {% endfor %} {% endfor %} - StepFunction: + SharedStepFunction: Type: AWS::StepFunctions::StateMachine Properties: RoleArn: !GetAtt StepFunctionRole.Arn DefinitionS3Location: step-function.json DefinitionSubstitutions: - JobQueueArn: !Ref JobQueueArn + SharedJobQueueArn: !Ref SharedJobQueueArn + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}JobQueueArn: !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn + {% endfor %} {% for job_type, job_spec in job_types.items() %} {% for task in job_spec['tasks'] %} {{ snake_to_pascal_case(task['name']) }}: !Ref {{ snake_to_pascal_case(task['name']) }} @@ -109,6 +123,9 @@ Resources: Effect: Allow ManagedPolicyArns: - !Ref StepFunctionPolicy + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + - !Ref {{ job_spec['compute_environment']['name'] }}StepFunctionPolicy + {% endfor %} StepFunctionPolicy: Type: {{ 'Custom::JplPolicy' if security_environment in ('JPL', 'JPL-public') else 'AWS::IAM::ManagedPolicy' }} @@ -123,7 +140,10 @@ Resources: - Effect: Allow Action: batch:SubmitJob Resource: - - !Ref JobQueueArn + - !Ref SharedJobQueueArn + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name']%} + - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn + {% endfor %} {% for job_type, job_spec in job_types.items() %} {% for task in job_spec['tasks'] %} - !Ref {{ snake_to_pascal_case(task['name']) }} From f6a71171cd2232bcd4cbad874cebb5a76dd37bee Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 13:46:57 -0500 Subject: [PATCH 10/62] SharedComputeEnvironment to Shared --- job_spec/ARIA_AUTORIFT.yml | 2 +- job_spec/ARIA_RAIDER.yml | 2 +- job_spec/AUTORIFT.yml | 2 +- job_spec/AUTORIFT_ITS_LIVE.yml | 2 +- job_spec/INSAR_GAMMA.yml | 2 +- job_spec/INSAR_ISCE.yml | 2 +- job_spec/INSAR_ISCE_BURST.yml | 2 +- job_spec/RTC_GAMMA.yml | 2 +- job_spec/S1_CORRECTION_ITS_LIVE.yml | 2 +- job_spec/SRG_GSLC.yml | 2 +- job_spec/WATER_MAP.yml | 2 +- job_spec/WATER_MAP_EQ.yml | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 093e75eea..48807911e 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -62,4 +62,4 @@ AUTORIFT: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index f7336b591..6acb05714 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -53,4 +53,4 @@ ARIA_RAIDER: - RAIDER_HRES_API_KEY - RAIDER_HRES_URL compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index 579b4c7bd..37c1d11b5 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -61,4 +61,4 @@ AUTORIFT: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index f38a784d8..aad9f4d11 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -75,4 +75,4 @@ AUTORIFT: - PUBLISH_ACCESS_KEY_ID - PUBLISH_SECRET_ACCESS_KEY compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index f8177cbb1..4873b81e3 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -120,4 +120,4 @@ INSAR_GAMMA: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 1b4e4b9fa..d4a749b5f 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -155,4 +155,4 @@ INSAR_ISCE: - RAIDER_HRES_API_KEY - RAIDER_HRES_URL compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index ed7ee9d5b..3d205dd05 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -68,4 +68,4 @@ INSAR_ISCE_BURST: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index ed52d2efb..7ee0c7055 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -143,4 +143,4 @@ RTC_GAMMA: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index ff3957c49..bd0c70b4b 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -53,4 +53,4 @@ S1_CORRECTION_TEST: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 39aee3a5c..fab968f40 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -48,7 +48,7 @@ SRG_GSLC: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SrgGslcComputeEnvironment' + name: 'SrgGslc' instance_types: - g4dn.xlarge - g4dn.2xlarge diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 9af97ae84..c8a7993a6 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -204,4 +204,4 @@ WATER_MAP: vcpu: 1 memory: 126000 compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 3b8fc1880..7c3bdf2a5 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -114,4 +114,4 @@ WATER_MAP_EQ: vcpu: 1 memory: 126000 compute_environment: - name: 'SharedComputeEnvironment' \ No newline at end of file + name: 'Shared' \ No newline at end of file From 0491dd61579f8895a39f34e5bc6a9dea8305e354 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 13:50:25 -0500 Subject: [PATCH 11/62] changelog update --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0555562ec..5c1114994 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [7.8.0] + +### Added +- Support for per-job specification of instance types and AMIs. + + ## [7.7.2] ### Change From 457e40d4b7f30e782ea6816e398f1e6f41156295 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:10:31 -0500 Subject: [PATCH 12/62] removed Batched from names --- apps/compute-cf.yml.j2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 20a0cbe49..ea7855eaf 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -31,7 +31,7 @@ Outputs: Value: !Ref SharedComputeEnvironment SharedJobQueueArn: - Value: !Ref SharedBatchJobQueue + Value: !Ref SharedJobQueue {% for job_type, job_spec in job_types.items() %} {% if 'Shared' not in job_spec['compute_environment']['name'] %} @@ -39,7 +39,7 @@ Outputs: Value: !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironment {{ job_spec['compute_environment']['name'] }}JobQueueArn: - Value: !Ref {{ job_spec['compute_environment']['name'] }}BatchJobQueue + Value: !Ref {{ job_spec['compute_environment']['name'] }}JobQueue {% endif %} {% endfor %} @@ -100,7 +100,7 @@ Resources: Tags: Name: !Ref AWS::StackName - SharedBatchJobQueue: + SharedJobQueue: Type: AWS::Batch::JobQueue Properties: Priority: 1 From ef0a8e56b154efd723b07feb4ba67d92b24d3c2c Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:11:37 -0500 Subject: [PATCH 13/62] ImageId to AmiId --- apps/compute-cf.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index ea7855eaf..74614e261 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -89,7 +89,7 @@ Resources: MinvCpus: 0 MaxvCpus: !Ref MaxvCpus InstanceTypes: !Ref InstanceTypes - ImageId: !Ref ImageId + ImageId: !Ref AmiId Subnets: !Ref SubnetIds InstanceRole: !Ref InstanceProfile SecurityGroupIds: From cec5376b1cee91889a290fe7da8249c5fe8ceddc Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:15:09 -0500 Subject: [PATCH 14/62] SharedStepFunctionArn to StepFunctionArn --- apps/main-cf.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index a3d94ea70..987514280 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -206,7 +206,7 @@ Resources: Type: AWS::CloudFormation::Stack Properties: Parameters: - SharedStepFunctionArn: !GetAtt StepFunction.Outputs.StepFunctionArn + StepFunctionArn: !GetAtt StepFunction.Outputs.StepFunctionArn TemplateURL: monitoring-cf.yml LogBucket: From 8024452e216d9fa2c783005ccb83a7a509bce167 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:18:05 -0500 Subject: [PATCH 15/62] added correct ami id --- job_spec/SRG_GSLC.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index fab968f40..0ad7b258b 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -39,11 +39,6 @@ SRG_GSLC: timeout: 10800 vcpu: 1 memory: 31500 - # TODO: GPU Resource Requirment is specified here, - # but the instance type and AMI come from - # compute-cf.yml - # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-batch-jobdefinition-resourcerequirement.html - gpu: 1 secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD @@ -53,4 +48,4 @@ SRG_GSLC: - g4dn.xlarge - g4dn.2xlarge - g6.xlarge - ami_id: instance_with_gpu_support.aws.com \ No newline at end of file + ami_id: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id \ No newline at end of file From df2f0776c6aaf071d539a5228ccfc9c3aaa0b05a Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:23:18 -0500 Subject: [PATCH 16/62] correct image id pt 2 --- job_spec/SRG_GSLC.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 0ad7b258b..763de3fbd 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -48,4 +48,5 @@ SRG_GSLC: - g4dn.xlarge - g4dn.2xlarge - g6.xlarge - ami_id: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id \ No newline at end of file + # Image ID for this image: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id + ami_id: ami-0ca2af66da8e56876 \ No newline at end of file From dc22adbff36a6545a59298fa22534ab88cfe9cc3 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:37:02 -0500 Subject: [PATCH 17/62] fixed tabs --- apps/scale-cluster/scale-cluster-cf.yml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index d455b9401..358247c0a 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -164,14 +164,14 @@ Resources: SubnetIds: !Ref SubnetIds {% endif %} - {{ job_spec['compute_environment']['name'] }}EventInvokeConfig: + {{ job_spec['compute_environment']['name'] }}EventInvokeConfig: Type: AWS::Lambda::EventInvokeConfig Properties: FunctionName: !Ref {{ job_spec['compute_environment']['name'] }}Lambda Qualifier: $LATEST MaximumRetryAttempts: 0 - {{ job_spec['compute_environment']['name'] }}EventPermission: + {{ job_spec['compute_environment']['name'] }}EventPermission: Type: AWS::Lambda::Permission Properties: FunctionName: !GetAtt {{ job_spec['compute_environment']['name'] }}Lambda.Arn From 6b519c51df7c2711673f784db8c7516969b6e85b Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:40:45 -0500 Subject: [PATCH 18/62] SharedStepFunction to StepFunction --- apps/workflow-cf.yml.j2 | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 2251e88b0..be4f5f2f7 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -38,13 +38,8 @@ Parameters: Outputs: - SharedStepFunctionArn: - Value: !Ref SharedStepFunction - - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name']%} - {{ job_spec['compute_environment']['name'] }}StepFunctionArn: - Value: !Ref {{ job_spec['compute_environment']['name'] }}StepFunction - {% endfor %} + StepFunctionArn: + Value: !Ref StepFunction Resources: @@ -87,7 +82,7 @@ Resources: {% endfor %} {% endfor %} - SharedStepFunction: + StepFunction: Type: AWS::StepFunctions::StateMachine Properties: RoleArn: !GetAtt StepFunctionRole.Arn From ee3869fa18edfaf0bf7c37938b20847aac2124ce Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:43:42 -0500 Subject: [PATCH 19/62] arn to Arn --- apps/scale-cluster/scale-cluster-cf.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 358247c0a..35903c244 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -128,7 +128,7 @@ Resources: - Arn: !GetAtt Lambda.Arn Id: lambda {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - - Arn: !GetAtt {{ job_spec['compute_environment']['name'] }}Lambda.arn + - Arn: !GetAtt {{ job_spec['compute_environment']['name'] }}Lambda.Arn Id: {{ job_spec['compute_environment']['name'] }}lambda {% endfor %} From 54ec15a4379067f1acbf1e68a9213b0e0c1daf24 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 14:44:50 -0500 Subject: [PATCH 20/62] removed extra stepfunctionpolicies --- apps/workflow-cf.yml.j2 | 3 --- 1 file changed, 3 deletions(-) diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index be4f5f2f7..f165f7ab8 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -118,9 +118,6 @@ Resources: Effect: Allow ManagedPolicyArns: - !Ref StepFunctionPolicy - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - - !Ref {{ job_spec['compute_environment']['name'] }}StepFunctionPolicy - {% endfor %} StepFunctionPolicy: Type: {{ 'Custom::JplPolicy' if security_environment in ('JPL', 'JPL-public') else 'AWS::IAM::ManagedPolicy' }} From 8e92db9e0552a2c43d9daab59b9c26a267792942 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 19:51:02 +0000 Subject: [PATCH 21/62] Bump openapi-core from 0.19.2 to 0.19.3 Bumps [openapi-core](https://github.com/python-openapi/openapi-core) from 0.19.2 to 0.19.3. - [Release notes](https://github.com/python-openapi/openapi-core/releases) - [Commits](https://github.com/python-openapi/openapi-core/compare/0.19.2...0.19.3) --- updated-dependencies: - dependency-name: openapi-core dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-apps-api.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-apps-api.txt b/requirements-apps-api.txt index 3a4d5aec4..7d4b7ff50 100644 --- a/requirements-apps-api.txt +++ b/requirements-apps-api.txt @@ -1,7 +1,7 @@ flask==2.2.5 Flask-Cors==4.0.1 jsonschema==4.23.0 -openapi-core==0.19.2 +openapi-core==0.19.3 prance==23.6.21.0 PyJWT==2.9.0 requests==2.32.3 From c8c2d948cad0c2dcf40b9b6692276719936e10d6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 19:51:06 +0000 Subject: [PATCH 22/62] Bump cfn-lint from 1.9.7 to 1.10.3 Bumps [cfn-lint](https://github.com/aws-cloudformation/cfn-lint) from 1.9.7 to 1.10.3. - [Release notes](https://github.com/aws-cloudformation/cfn-lint/releases) - [Changelog](https://github.com/aws-cloudformation/cfn-lint/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-cloudformation/cfn-lint/compare/v1.9.7...v1.10.3) --- updated-dependencies: - dependency-name: cfn-lint dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index 11f6b8327..c29d8b516 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -17,4 +17,4 @@ flake8-blind-except==0.2.1 flake8-builtins==2.5.0 setuptools==72.1.0 openapi-spec-validator==0.7.1 -cfn-lint==1.9.7 +cfn-lint==1.10.3 From ee1603c0626a466627194cbf2659e72d6bd00a5b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 19:51:15 +0000 Subject: [PATCH 23/62] Bump boto3 from 1.34.159 to 1.35.6 Bumps [boto3](https://github.com/boto/boto3) from 1.34.159 to 1.35.6. - [Release notes](https://github.com/boto/boto3/releases) - [Commits](https://github.com/boto/boto3/compare/1.34.159...1.35.6) --- updated-dependencies: - dependency-name: boto3 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- requirements-apps-disable-private-dns.txt | 2 +- requirements-apps-start-execution-manager.txt | 2 +- requirements-apps-start-execution-worker.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements-all.txt b/requirements-all.txt index 11f6b8327..f1e30ceb5 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -5,7 +5,7 @@ -r requirements-apps-start-execution-worker.txt -r requirements-apps-disable-private-dns.txt -r requirements-apps-update-db.txt -boto3==1.34.159 +boto3==1.35.6 jinja2==3.1.4 moto[dynamodb]==5.0.12 pytest==8.3.2 diff --git a/requirements-apps-disable-private-dns.txt b/requirements-apps-disable-private-dns.txt index 16c7ac7aa..0abc8dfb1 100644 --- a/requirements-apps-disable-private-dns.txt +++ b/requirements-apps-disable-private-dns.txt @@ -1 +1 @@ -boto3==1.34.159 +boto3==1.35.6 diff --git a/requirements-apps-start-execution-manager.txt b/requirements-apps-start-execution-manager.txt index 00ad69582..079b6cdeb 100644 --- a/requirements-apps-start-execution-manager.txt +++ b/requirements-apps-start-execution-manager.txt @@ -1,3 +1,3 @@ -boto3==1.34.159 +boto3==1.35.6 ./lib/dynamo/ ./lib/lambda_logging/ diff --git a/requirements-apps-start-execution-worker.txt b/requirements-apps-start-execution-worker.txt index aca0bc7ac..9749fb163 100644 --- a/requirements-apps-start-execution-worker.txt +++ b/requirements-apps-start-execution-worker.txt @@ -1,2 +1,2 @@ -boto3==1.34.159 +boto3==1.35.6 ./lib/lambda_logging/ From 3fac4b2a6c488f964ed993f33e6627f9fd81c051 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 19:51:19 +0000 Subject: [PATCH 24/62] Bump shapely from 2.0.5 to 2.0.6 Bumps [shapely](https://github.com/shapely/shapely) from 2.0.5 to 2.0.6. - [Release notes](https://github.com/shapely/shapely/releases) - [Changelog](https://github.com/shapely/shapely/blob/main/CHANGES.txt) - [Commits](https://github.com/shapely/shapely/compare/2.0.5...2.0.6) --- updated-dependencies: - dependency-name: shapely dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-apps-api.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-apps-api.txt b/requirements-apps-api.txt index 3a4d5aec4..68d99cd96 100644 --- a/requirements-apps-api.txt +++ b/requirements-apps-api.txt @@ -6,6 +6,6 @@ prance==23.6.21.0 PyJWT==2.9.0 requests==2.32.3 serverless_wsgi==3.0.4 -shapely==2.0.5 +shapely==2.0.6 strict-rfc3339==0.7 ./lib/dynamo/ From a28ba15340e5280c425e10414622989dfbc6b99b Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 15:08:35 -0500 Subject: [PATCH 25/62] add custom launch template user data commands to job_spec --- apps/compute-cf.yml.j2 | 40 ++++++++++++++++++++++++++++++++-------- job_spec/SRG_GSLC.yml | 22 ++++++++++++++++++++-- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 74614e261..3da49cd25 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -58,7 +58,7 @@ Resources: GroupDescription: !Sub "Security group for ${AWS::StackName} machines" VpcId: !Ref VpcId - LaunchTemplate: + SharedLaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: LaunchTemplateData: @@ -95,8 +95,8 @@ Resources: SecurityGroupIds: - !Ref SecurityGroup LaunchTemplate: - LaunchTemplateId: !Ref LaunchTemplate - Version: !GetAtt LaunchTemplate.LatestVersionNumber + LaunchTemplateId: !Ref SharedLaunchTemplate + Version: !GetAtt SharedLaunchTemplate.LatestVersionNumber Tags: Name: !Ref AWS::StackName @@ -113,6 +113,30 @@ Resources: Type: AWS::Batch::SchedulingPolicy {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {{ job_spec['compute_environment']['name'] }}LaunchTemplate: + Type: AWS::EC2::LaunchTemplate + Properties: + LaunchTemplateData: + MetadataOptions: + HttpTokens: required + UserData: + Fn::Base64: |- + Content-Type: multipart/mixed; boundary="==BOUNDARY==" + MIME-Version: 1.0 + + --==BOUNDARY== + Content-Type: text/cloud-boothook; charset="us-ascii" + + #!/bin/bash + cloud-init-per instance mkfs_ssd mkfs.ext4 /dev/nvme1n1 + mount /dev/nvme1n1 /var/lib/docker + + {% for command in job_spec['compute_environment']['user_data_commands'] %} + {{ command }} + {% endfor %} + + --==BOUNDARY==-- + {{ job_spec['compute_environment']['name'] }}ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: @@ -121,9 +145,9 @@ Resources: ComputeResources: Type: SPOT AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED - MinvCpus: 0 - MaxvCpus: !Ref MaxvCpus - InstanceTypes: + MinvCpus: 0 + MaxvCpus: !Ref MaxvCpus + InstanceTypes: {% for instance_type in job_spec['compute_environment']['instance_types'] %} - {{ instance_type }} {% endfor %} @@ -133,8 +157,8 @@ Resources: SecurityGroupIds: - !Ref SecurityGroup LaunchTemplate: - LaunchTemplateId: !Ref LaunchTemplate - Version: !GetAtt LaunchTemplate.LatestVersionNumber + LaunchTemplateId: !Ref {{ job_spec['compute_environment']['name'] }}LaunchTemplate + Version: !GetAtt {{ job_spec['compute_environment']['name'] }}LaunchTemplate.LatestVersionNumber Tags: Name: !Ref AWS::StackName diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 763de3fbd..58e1b943d 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -43,10 +43,28 @@ SRG_GSLC: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SrgGslc' + name: SrgGslc instance_types: - g4dn.xlarge - g4dn.2xlarge - g6.xlarge # Image ID for this image: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id - ami_id: ami-0ca2af66da8e56876 \ No newline at end of file + ami_id: ami-0ca2af66da8e56876 + user_data_commands: + - DRIVER_VERSION=550.54.14 + - dnf install -y kernel-devel-$(uname -r) kernel-headers-$(uname -r) kernel-modules-extra + - curl -fSsl -O https://us.download.nvidia.com/tesla/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run + - chmod +x NVIDIA-Linux-x86_64-$DRIVER_VERSION.run + - ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --tmpdir . --silent + - rm ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run + - dnf install -y docker git + - systemctl start docker + - systemctl enable docker + - usermod -aG docker ec2-user + - dnf config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo + - dnf install -y nvidia-container-toolkit + - nvidia-ctk runtime configure --runtime=docker + - systemctl restart docker + - dnf install -y git + - dnf clean all && rm -rf /var/cache/dnf/* + - reboot \ No newline at end of file From 2fe012a4ea5201923cb80d98d17168b00da0f1f6 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 15:11:59 -0500 Subject: [PATCH 26/62] removed todos --- apps/compute-cf.yml.j2 | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 3da49cd25..1bb4dcfb6 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -1,9 +1,5 @@ AWSTemplateFormatVersion: 2010-09-09 -# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-batch-computeenvironment-computeresources.html -# TODO: This is where the instance AMI and Type are specified. To have multiple instance types and amis per deployment, this -# will need to support that. - Parameters: VpcId: @@ -145,8 +141,8 @@ Resources: ComputeResources: Type: SPOT AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED - MinvCpus: 0 - MaxvCpus: !Ref MaxvCpus + MinvCpus: 0 + MaxvCpus: !Ref MaxvCpus InstanceTypes: {% for instance_type in job_spec['compute_environment']['instance_types'] %} - {{ instance_type }} From 3b0b0cf6355ce99983e47c95f002d8878f8a570e Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 26 Aug 2024 15:12:30 -0500 Subject: [PATCH 27/62] removed todos --- .github/workflows/deploy-daac.yml | 2 -- Makefile | 1 - 2 files changed, 3 deletions(-) diff --git a/.github/workflows/deploy-daac.yml b/.github/workflows/deploy-daac.yml index f499b9af3..c6af17100 100644 --- a/.github/workflows/deploy-daac.yml +++ b/.github/workflows/deploy-daac.yml @@ -26,13 +26,11 @@ jobs: cost_profile: EDC deploy_ref: refs/heads/main job_files: job_spec/AUTORIFT.yml job_spec/INSAR_GAMMA.yml job_spec/RTC_GAMMA.yml job_spec/INSAR_ISCE_BURST.yml - # TODO: Instance types will need to be moved to the Job Spec file. instance_types: r6id.xlarge,r6id.2xlarge,r6id.4xlarge,r6id.8xlarge,r6idn.xlarge,r6idn.2xlarge,r6idn.4xlarge,r6idn.8xlarge default_max_vcpus: 1500 expanded_max_vcpus: 3000 required_surplus: 2000 security_environment: EDC - # TODO: AMIs will need to be moved to the Job Spec file. ami_id: image_id_ecs_amz2 distribution_url: 'https://d3gm2hf49xd6jj.cloudfront.net' diff --git a/Makefile b/Makefile index b89da8437..f3bb52acd 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,6 @@ install: python -m pip install -r requirements-all.txt -# TODO: This is where render is called and provided the job type files files ?= job_spec/*.yml security_environment ?= ASF api_name ?= local From 169270dfac5a8cd351f6e9ad017a0b5e6256c56a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 21:37:07 +0000 Subject: [PATCH 28/62] Bump setuptools from 72.1.0 to 73.0.1 Bumps [setuptools](https://github.com/pypa/setuptools) from 72.1.0 to 73.0.1. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v72.1.0...v73.0.1) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index dcf848aa2..dfbcdcf3a 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -15,6 +15,6 @@ flake8==7.1.1 flake8-import-order==0.18.2 flake8-blind-except==0.2.1 flake8-builtins==2.5.0 -setuptools==72.1.0 +setuptools==73.0.1 openapi-spec-validator==0.7.1 cfn-lint==1.10.3 From 68b2d3b91875bb18f2a71f978e811bd62327de46 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 12:02:30 -0500 Subject: [PATCH 29/62] defaults and variables --- apps/compute-cf.yml.j2 | 33 ++++++++++++++++++--------------- apps/main-cf.yml.j2 | 9 ++++++--- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 1bb4dcfb6..69b5862eb 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -29,15 +29,14 @@ Outputs: SharedJobQueueArn: Value: !Ref SharedJobQueue - {% for job_type, job_spec in job_types.items() %} - {% if 'Shared' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn: - Value: !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironment + {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% set job_name = job_spec['compute_environment']['name'] %} + {{ job_name }}ComputeEnvironmentArn: + Value: !Ref {{ job_name }}ComputeEnvironment - {{ job_spec['compute_environment']['name'] }}JobQueueArn: - Value: !Ref {{ job_spec['compute_environment']['name'] }}JobQueue + {{ job_name }}JobQueueArn: + Value: !Ref {{ job_name }}JobQueue - {% endif %} {% endfor %} TaskRoleArn: @@ -109,7 +108,11 @@ Resources: Type: AWS::Batch::SchedulingPolicy {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}LaunchTemplate: + {% set env = job_spec['compute_environment'] %} + {% set job_name = env['name'] %} + {% set instance_types = env['instance_types'] if 'instance_types' in env else ['!Ref InstanceTypes'] %} + {% set ami_id = env['ami_id'] if 'ami_id' in env else '!Ref AmiId' %} + {{ job_name }}LaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: LaunchTemplateData: @@ -133,7 +136,7 @@ Resources: --==BOUNDARY==-- - {{ job_spec['compute_environment']['name'] }}ComputeEnvironment: + {{ job_name }}ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: ServiceRole: !GetAtt BatchServiceRole.Arn @@ -144,26 +147,26 @@ Resources: MinvCpus: 0 MaxvCpus: !Ref MaxvCpus InstanceTypes: - {% for instance_type in job_spec['compute_environment']['instance_types'] %} + {% for instance_type in instance_types %} - {{ instance_type }} {% endfor %} - ImageId: {{ job_spec['compute_environment']['ami_id'] }} + ImageId: {{ ami_id }} Subnets: !Ref SubnetIds InstanceRole: !Ref InstanceProfile SecurityGroupIds: - !Ref SecurityGroup LaunchTemplate: - LaunchTemplateId: !Ref {{ job_spec['compute_environment']['name'] }}LaunchTemplate - Version: !GetAtt {{ job_spec['compute_environment']['name'] }}LaunchTemplate.LatestVersionNumber + LaunchTemplateId: !Ref {{ job_name }}LaunchTemplate + Version: !GetAtt {{ job_name }}LaunchTemplate.LatestVersionNumber Tags: Name: !Ref AWS::StackName - {{ job_spec['compute_environment']['name'] }}JobQueue: + {{ job_name }}JobQueue: Type: AWS::Batch::JobQueue Properties: Priority: 1 ComputeEnvironmentOrder: - - ComputeEnvironment: !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironment + - ComputeEnvironment: !Ref {{ job_name }}ComputeEnvironment Order: 1 SchedulingPolicyArn: !Ref SchedulingPolicy diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index 987514280..aa8c8818e 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -155,7 +155,8 @@ Resources: Parameters: ComputeEnvironmentArn: !GetAtt Cluster.Outputs.SharedComputeEnvironmentArn {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn + {% set job_name = job_spec['compute_environment']['name'] %} + {{ job_name }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ job_name }}ComputeEnvironmentArn {% endfor %} DefaultMaxvCpus: !Ref DefaultMaxvCpus ExpandedMaxvCpus: !Ref ExpandedMaxvCpus @@ -173,7 +174,8 @@ Resources: Parameters: JobQueueArn: !GetAtt Cluster.Outputs.SharedJobQueueArn {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}JobQueueArn: !GetAtt Cluster.Outputs.{{ job_spec['compute_environment']['name'] }}JobQueueArn + {% set job_name = job_spec['compute_environment']['name'] %} + {{ job_name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ job_name }}JobQueueArn {% endfor %} JobsTable: !Ref JobsTable {% if security_environment == 'EDC' %} @@ -188,7 +190,8 @@ Resources: Parameters: SharedJobQueueArn: !GetAtt Cluster.Outputs.SharedJobQueueArn {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}JobQueueArn: !GetAtt Cluster.Outputs.{{ job_spec['compute_environment']['name'] }}JobQueueArn + {% set job_name = job_spec['compute_environment']['name'] %} + {{ job_name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ job_name }}JobQueueArn {% endfor %} TaskRoleArn: !GetAtt Cluster.Outputs.TaskRoleArn JobsTable: !Ref JobsTable From 4ce4abede8eb0377954821244d3aa7133a7070a3 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 12:02:38 -0500 Subject: [PATCH 30/62] shorter comment --- job_spec/SRG_GSLC.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 58e1b943d..d535344c4 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -48,7 +48,7 @@ SRG_GSLC: - g4dn.xlarge - g4dn.2xlarge - g6.xlarge - # Image ID for this image: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id + # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id ami_id: ami-0ca2af66da8e56876 user_data_commands: - DRIVER_VERSION=550.54.14 From 8ff4a7d004fedd299e8c058ed86683858c157492 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 12:07:41 -0500 Subject: [PATCH 31/62] shorter jinja variable names --- apps/compute-cf.yml.j2 | 24 ++++++++++++------------ apps/main-cf.yml.j2 | 12 ++++++------ apps/workflow-cf.yml.j2 | 3 ++- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 69b5862eb..7951ad31e 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -30,12 +30,12 @@ Outputs: Value: !Ref SharedJobQueue {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {% set job_name = job_spec['compute_environment']['name'] %} - {{ job_name }}ComputeEnvironmentArn: - Value: !Ref {{ job_name }}ComputeEnvironment + {% set name = job_spec['compute_environment']['name'] %} + {{ name }}ComputeEnvironmentArn: + Value: !Ref {{ name }}ComputeEnvironment - {{ job_name }}JobQueueArn: - Value: !Ref {{ job_name }}JobQueue + {{ name }}JobQueueArn: + Value: !Ref {{ name }}JobQueue {% endfor %} @@ -109,10 +109,10 @@ Resources: {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} {% set env = job_spec['compute_environment'] %} - {% set job_name = env['name'] %} + {% set name = env['name'] %} {% set instance_types = env['instance_types'] if 'instance_types' in env else ['!Ref InstanceTypes'] %} {% set ami_id = env['ami_id'] if 'ami_id' in env else '!Ref AmiId' %} - {{ job_name }}LaunchTemplate: + {{ name }}LaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: LaunchTemplateData: @@ -136,7 +136,7 @@ Resources: --==BOUNDARY==-- - {{ job_name }}ComputeEnvironment: + {{ name }}ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: ServiceRole: !GetAtt BatchServiceRole.Arn @@ -156,17 +156,17 @@ Resources: SecurityGroupIds: - !Ref SecurityGroup LaunchTemplate: - LaunchTemplateId: !Ref {{ job_name }}LaunchTemplate - Version: !GetAtt {{ job_name }}LaunchTemplate.LatestVersionNumber + LaunchTemplateId: !Ref {{ name }}LaunchTemplate + Version: !GetAtt {{ name }}LaunchTemplate.LatestVersionNumber Tags: Name: !Ref AWS::StackName - {{ job_name }}JobQueue: + {{ name }}JobQueue: Type: AWS::Batch::JobQueue Properties: Priority: 1 ComputeEnvironmentOrder: - - ComputeEnvironment: !Ref {{ job_name }}ComputeEnvironment + - ComputeEnvironment: !Ref {{ name }}ComputeEnvironment Order: 1 SchedulingPolicyArn: !Ref SchedulingPolicy diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index aa8c8818e..fb54abbea 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -155,8 +155,8 @@ Resources: Parameters: ComputeEnvironmentArn: !GetAtt Cluster.Outputs.SharedComputeEnvironmentArn {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {% set job_name = job_spec['compute_environment']['name'] %} - {{ job_name }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ job_name }}ComputeEnvironmentArn + {% set name = job_spec['compute_environment']['name'] %} + {{ name }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ name }}ComputeEnvironmentArn {% endfor %} DefaultMaxvCpus: !Ref DefaultMaxvCpus ExpandedMaxvCpus: !Ref ExpandedMaxvCpus @@ -174,8 +174,8 @@ Resources: Parameters: JobQueueArn: !GetAtt Cluster.Outputs.SharedJobQueueArn {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {% set job_name = job_spec['compute_environment']['name'] %} - {{ job_name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ job_name }}JobQueueArn + {% set name = job_spec['compute_environment']['name'] %} + {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} JobsTable: !Ref JobsTable {% if security_environment == 'EDC' %} @@ -190,8 +190,8 @@ Resources: Parameters: SharedJobQueueArn: !GetAtt Cluster.Outputs.SharedJobQueueArn {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {% set job_name = job_spec['compute_environment']['name'] %} - {{ job_name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ job_name }}JobQueueArn + {% set name = job_spec['compute_environment']['name'] %} + {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} TaskRoleArn: !GetAtt Cluster.Outputs.TaskRoleArn JobsTable: !Ref JobsTable diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index f165f7ab8..17592a1f1 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -90,7 +90,8 @@ Resources: DefinitionSubstitutions: SharedJobQueueArn: !Ref SharedJobQueueArn {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}JobQueueArn: !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn + {% set name = job_spec['compute_environment']['name'] %} + {{ name }}JobQueueArn: !Ref {{ name }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} {% for task in job_spec['tasks'] %} From 7654d3e9b57134165712847f572e9f7ca897f909 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 12:09:26 -0500 Subject: [PATCH 32/62] removed added newline --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index f3bb52acd..c97f2c118 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,6 @@ run: render install: python -m pip install -r requirements-all.txt - files ?= job_spec/*.yml security_environment ?= ASF api_name ?= local From b9f8d56a250e4e416e6cdc529bdc86877cf01694 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 12:10:16 -0500 Subject: [PATCH 33/62] remove todo --- apps/render_cf.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/apps/render_cf.py b/apps/render_cf.py index b1a21ed9f..1907d065a 100644 --- a/apps/render_cf.py +++ b/apps/render_cf.py @@ -67,11 +67,6 @@ def main(): parser.add_argument('-c', '--cost-profile', default='DEFAULT', choices=['DEFAULT', 'EDC']) args = parser.parse_args() - # TODO: Job Types are provided here, by the Makefile. - # The Job Types will need to contain a parsable field - # that contains the information needed for per-instance - # AMIs, types, and resources. - job_types = {} for file in args.job_spec_files: job_types.update(yaml.safe_load(file.read_text())) From 7a385ced098a0dfb2815bf7b5e9f999e0c35cfd9 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 12:12:35 -0500 Subject: [PATCH 34/62] variables --- apps/scale-cluster/scale-cluster-cf.yml.j2 | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index 35903c244..be187b980 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -6,7 +6,8 @@ Parameters: Type: String {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn: + {% set name = job_spec['compute_environment']['name'] %} + {{ name }}ComputeEnvironmentArn: Type: String {% endfor %} @@ -128,8 +129,9 @@ Resources: - Arn: !GetAtt Lambda.Arn Id: lambda {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - - Arn: !GetAtt {{ job_spec['compute_environment']['name'] }}Lambda.Arn - Id: {{ job_spec['compute_environment']['name'] }}lambda + {% set name = job_spec['compute_environment']['name'] %} + - Arn: !GetAtt {{ name }}Lambda.Arn + Id: {{ name }}lambda {% endfor %} EventPermission: @@ -141,12 +143,13 @@ Resources: SourceArn: !GetAtt Schedule.Arn {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} - {{ job_spec['compute_environment']['name'] }}Lambda: + {% set name = job_spec['compute_environment']['name'] %} + {{ name }}Lambda: Type: AWS::Lambda::Function Properties: Environment: Variables: - COMPUTE_ENVIRONMENT_ARN: !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn + COMPUTE_ENVIRONMENT_ARN: !Ref {{name }}ComputeEnvironmentArn MONTHLY_BUDGET: !Ref MonthlyBudget DEFAULT_MAX_VCPUS: !Ref DefaultMaxvCpus EXPANDED_MAX_VCPUS: !Ref ExpandedMaxvCpus @@ -164,17 +167,17 @@ Resources: SubnetIds: !Ref SubnetIds {% endif %} - {{ job_spec['compute_environment']['name'] }}EventInvokeConfig: + {{ name }}EventInvokeConfig: Type: AWS::Lambda::EventInvokeConfig Properties: - FunctionName: !Ref {{ job_spec['compute_environment']['name'] }}Lambda + FunctionName: !Ref {{ name }}Lambda Qualifier: $LATEST MaximumRetryAttempts: 0 - {{ job_spec['compute_environment']['name'] }}EventPermission: + {{ name }}EventPermission: Type: AWS::Lambda::Permission Properties: - FunctionName: !GetAtt {{ job_spec['compute_environment']['name'] }}Lambda.Arn + FunctionName: !GetAtt {{ name }}Lambda.Arn Action: lambda:InvokeFunction Principal: events.amazonaws.com SourceArn: !GetAtt Schedule.Arn From 28a66fb937e8f80cd1ab385939e483eae4d3237f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 12:37:41 -0500 Subject: [PATCH 35/62] gpu tag support --- apps/workflow-cf.yml.j2 | 2 ++ job_spec/SRG_GSLC.yml | 1 + 2 files changed, 3 insertions(+) diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 17592a1f1..db8545010 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -66,6 +66,8 @@ Resources: Value: "{{ task['vcpu'] }}" - Type: MEMORY Value: "{{ task['memory'] }}" + - Type: GPU + Value: "{{ task['gpu'] if 'gpu' in task else 0 }}" Command: {% for command in task['command'] %} - {{ command }} diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index d535344c4..956dd45ca 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -38,6 +38,7 @@ SRG_GSLC: - Ref::granules timeout: 10800 vcpu: 1 + gpu: 1 memory: 31500 secrets: - EARTHDATA_USERNAME From 0f6e778e2a09a69d3360be85331e206831c5063f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 13:45:19 -0500 Subject: [PATCH 36/62] eof newlines --- job_spec/ARIA_AUTORIFT.yml | 2 +- job_spec/ARIA_RAIDER.yml | 2 +- job_spec/AUTORIFT.yml | 2 +- job_spec/AUTORIFT_ITS_LIVE.yml | 2 +- job_spec/INSAR_GAMMA.yml | 2 +- job_spec/INSAR_ISCE.yml | 2 +- job_spec/INSAR_ISCE_BURST.yml | 2 +- job_spec/RTC_GAMMA.yml | 2 +- job_spec/S1_CORRECTION_ITS_LIVE.yml | 2 +- job_spec/WATER_MAP.yml | 2 +- job_spec/WATER_MAP_EQ.yml | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 48807911e..c98a4beaf 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -62,4 +62,4 @@ AUTORIFT: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index 6acb05714..b5808db45 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -53,4 +53,4 @@ ARIA_RAIDER: - RAIDER_HRES_API_KEY - RAIDER_HRES_URL compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index 37c1d11b5..3df01e633 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -61,4 +61,4 @@ AUTORIFT: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index aad9f4d11..08f7aa40b 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -75,4 +75,4 @@ AUTORIFT: - PUBLISH_ACCESS_KEY_ID - PUBLISH_SECRET_ACCESS_KEY compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 4873b81e3..688a2cf88 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -120,4 +120,4 @@ INSAR_GAMMA: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index d4a749b5f..97ae639f7 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -155,4 +155,4 @@ INSAR_ISCE: - RAIDER_HRES_API_KEY - RAIDER_HRES_URL compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index 3d205dd05..97f3527d0 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -68,4 +68,4 @@ INSAR_ISCE_BURST: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index 7ee0c7055..9f6a11a0e 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -143,4 +143,4 @@ RTC_GAMMA: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' \ No newline at end of file + name: 'SharedComputeEnvironment' diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index bd0c70b4b..49be6e94c 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -53,4 +53,4 @@ S1_CORRECTION_TEST: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index c8a7993a6..c7996bee5 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -204,4 +204,4 @@ WATER_MAP: vcpu: 1 memory: 126000 compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 7c3bdf2a5..7577e614e 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -114,4 +114,4 @@ WATER_MAP_EQ: vcpu: 1 memory: 126000 compute_environment: - name: 'Shared' \ No newline at end of file + name: 'Shared' From e694d61093c072c9ee1cf0a3b1b734e5b18eb53f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 13:45:27 -0500 Subject: [PATCH 37/62] match gpu_support --- job_spec/SRG_GSLC.yml | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 956dd45ca..81f2c39e4 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -39,33 +39,13 @@ SRG_GSLC: timeout: 10800 vcpu: 1 gpu: 1 - memory: 31500 + memory: 30500 secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: name: SrgGslc instance_types: - - g4dn.xlarge - - g4dn.2xlarge - - g6.xlarge + - g6.2xlarge # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id ami_id: ami-0ca2af66da8e56876 - user_data_commands: - - DRIVER_VERSION=550.54.14 - - dnf install -y kernel-devel-$(uname -r) kernel-headers-$(uname -r) kernel-modules-extra - - curl -fSsl -O https://us.download.nvidia.com/tesla/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run - - chmod +x NVIDIA-Linux-x86_64-$DRIVER_VERSION.run - - ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --tmpdir . --silent - - rm ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run - - dnf install -y docker git - - systemctl start docker - - systemctl enable docker - - usermod -aG docker ec2-user - - dnf config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo - - dnf install -y nvidia-container-toolkit - - nvidia-ctk runtime configure --runtime=docker - - systemctl restart docker - - dnf install -y git - - dnf clean all && rm -rf /var/cache/dnf/* - - reboot \ No newline at end of file From cc675fdf43af5b0e0c8f710600c0c57dd570cf92 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 14:04:44 -0500 Subject: [PATCH 38/62] removed shared in names + 'Shared' to 'Default' --- apps/compute-cf.yml.j2 | 90 +++++++------------ .../handle-batch-event-cf.yml.j2 | 4 +- apps/main-cf.yml.j2 | 12 +-- apps/scale-cluster/scale-cluster-cf.yml.j2 | 10 +-- apps/step-function.json.j2 | 4 +- apps/workflow-cf.yml.j2 | 12 +-- job_spec/ARIA_AUTORIFT.yml | 2 +- job_spec/ARIA_RAIDER.yml | 2 +- job_spec/AUTORIFT.yml | 2 +- job_spec/AUTORIFT_ITS_LIVE.yml | 2 +- job_spec/INSAR_GAMMA.yml | 2 +- job_spec/INSAR_ISCE.yml | 2 +- job_spec/INSAR_ISCE_BURST.yml | 2 +- job_spec/RTC_GAMMA.yml | 2 +- job_spec/S1_CORRECTION_ITS_LIVE.yml | 2 +- job_spec/WATER_MAP.yml | 2 +- job_spec/WATER_MAP_EQ.yml | 2 +- 17 files changed, 66 insertions(+), 88 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 7951ad31e..735432c66 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -23,13 +23,13 @@ Parameters: Outputs: - SharedComputeEnvironmentArn: - Value: !Ref SharedComputeEnvironment + ComputeEnvironmentArn: + Value: !Ref ComputeEnvironment - SharedJobQueueArn: - Value: !Ref SharedJobQueue + JobQueueArn: + Value: !Ref JobQueue - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}ComputeEnvironmentArn: Value: !Ref {{ name }}ComputeEnvironment @@ -53,7 +53,7 @@ Resources: GroupDescription: !Sub "Security group for ${AWS::StackName} machines" VpcId: !Ref VpcId - SharedLaunchTemplate: + LaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: LaunchTemplateData: @@ -73,69 +73,45 @@ Resources: --==BOUNDARY==-- - SharedComputeEnvironment: - Type: AWS::Batch::ComputeEnvironment - Properties: - ServiceRole: !GetAtt BatchServiceRole.Arn - Type: MANAGED - ComputeResources: - Type: SPOT - AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED - MinvCpus: 0 - MaxvCpus: !Ref MaxvCpus - InstanceTypes: !Ref InstanceTypes - ImageId: !Ref AmiId - Subnets: !Ref SubnetIds - InstanceRole: !Ref InstanceProfile - SecurityGroupIds: - - !Ref SecurityGroup - LaunchTemplate: - LaunchTemplateId: !Ref SharedLaunchTemplate - Version: !GetAtt SharedLaunchTemplate.LatestVersionNumber - Tags: - Name: !Ref AWS::StackName - - SharedJobQueue: + ComputeEnvironment: + Type: AWS::Batch::ComputeEnvironment + Properties: + ServiceRole: !GetAtt BatchServiceRole.Arn + Type: MANAGED + ComputeResources: + Type: SPOT + AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED + MinvCpus: 0 + MaxvCpus: !Ref MaxvCpus + InstanceTypes: !Ref InstanceTypes + ImageId: !Ref AmiId + Subnets: !Ref SubnetIds + InstanceRole: !Ref InstanceProfile + SecurityGroupIds: + - !Ref SecurityGroup + LaunchTemplate: + LaunchTemplateId: !Ref LaunchTemplate + Version: !GetAtt LaunchTemplate.LatestVersionNumber + Tags: + Name: !Ref AWS::StackName + + JobQueue: Type: AWS::Batch::JobQueue Properties: Priority: 1 ComputeEnvironmentOrder: - - ComputeEnvironment: !Ref SharedComputeEnvironment + - ComputeEnvironment: !Ref ComputeEnvironment Order: 1 SchedulingPolicyArn: !Ref SchedulingPolicy SchedulingPolicy: Type: AWS::Batch::SchedulingPolicy - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set env = job_spec['compute_environment'] %} {% set name = env['name'] %} {% set instance_types = env['instance_types'] if 'instance_types' in env else ['!Ref InstanceTypes'] %} {% set ami_id = env['ami_id'] if 'ami_id' in env else '!Ref AmiId' %} - {{ name }}LaunchTemplate: - Type: AWS::EC2::LaunchTemplate - Properties: - LaunchTemplateData: - MetadataOptions: - HttpTokens: required - UserData: - Fn::Base64: |- - Content-Type: multipart/mixed; boundary="==BOUNDARY==" - MIME-Version: 1.0 - - --==BOUNDARY== - Content-Type: text/cloud-boothook; charset="us-ascii" - - #!/bin/bash - cloud-init-per instance mkfs_ssd mkfs.ext4 /dev/nvme1n1 - mount /dev/nvme1n1 /var/lib/docker - - {% for command in job_spec['compute_environment']['user_data_commands'] %} - {{ command }} - {% endfor %} - - --==BOUNDARY==-- - {{ name }}ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: @@ -156,8 +132,8 @@ Resources: SecurityGroupIds: - !Ref SecurityGroup LaunchTemplate: - LaunchTemplateId: !Ref {{ name }}LaunchTemplate - Version: !GetAtt {{ name }}LaunchTemplate.LatestVersionNumber + LaunchTemplateId: !Ref LaunchTemplate + Version: !GetAtt LaunchTemplate.LatestVersionNumber Tags: Name: !Ref AWS::StackName diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index cd2cbc3ce..f0b5f07b5 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: JobQueueArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {{ job_spec['compute_environment']['name'] }}JobQueueArn: Type: String {% endfor %} @@ -100,7 +100,7 @@ Resources: detail: jobQueue: - !Ref JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn {% endfor %} status: diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index fb54abbea..02ff8937d 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -153,8 +153,8 @@ Resources: Condition: ScaleCluster Properties: Parameters: - ComputeEnvironmentArn: !GetAtt Cluster.Outputs.SharedComputeEnvironmentArn - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + ComputeEnvironmentArn: !GetAtt Cluster.Outputs.ComputeEnvironmentArn + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ name }}ComputeEnvironmentArn {% endfor %} @@ -172,8 +172,8 @@ Resources: Type: AWS::CloudFormation::Stack Properties: Parameters: - JobQueueArn: !GetAtt Cluster.Outputs.SharedJobQueueArn - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} @@ -188,8 +188,8 @@ Resources: Type: AWS::CloudFormation::Stack Properties: Parameters: - SharedJobQueueArn: !GetAtt Cluster.Outputs.SharedJobQueueArn - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index be187b980..b5bbabc62 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: ComputeEnvironmentArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}ComputeEnvironmentArn: Type: String @@ -87,7 +87,7 @@ Resources: Action: batch:UpdateComputeEnvironment Resource: - !Ref ComputeEnvironmentArn - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} - !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn {% endfor %} @@ -128,7 +128,7 @@ Resources: Targets: - Arn: !GetAtt Lambda.Arn Id: lambda - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} - Arn: !GetAtt {{ name }}Lambda.Arn Id: {{ name }}lambda @@ -142,7 +142,7 @@ Resources: Principal: events.amazonaws.com SourceArn: !GetAtt Schedule.Arn - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}Lambda: Type: AWS::Lambda::Function @@ -181,4 +181,4 @@ Resources: Action: lambda:InvokeFunction Principal: events.amazonaws.com SourceArn: !GetAtt Schedule.Arn - {% endfor %} \ No newline at end of file + {% endfor %} diff --git a/apps/step-function.json.j2 b/apps/step-function.json.j2 index bd494858e..b0598d480 100644 --- a/apps/step-function.json.j2 +++ b/apps/step-function.json.j2 @@ -207,7 +207,9 @@ "Parameters": { "JobDefinition": "{{ '${'+ snake_to_pascal_case(task['name']) + '}' }}", "JobName.$": "$.job_id", - "JobQueue": "{{ '${'+ job_spec['compute_environment']['name'] +'JobQueueArn}' }}", + {% set name = job_spec['compute_environment']['name'] %} + {% set job_queue = name + 'JobQueueArn' if 'Default' not in name else 'JobQueueArn' %} + "JobQueue": "{{ '${'+ job_queue +'}' }}", "ShareIdentifier": "default", "SchedulingPriorityOverride.$": "$.priority", "Parameters.$": "$.job_parameters", diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index db8545010..694ec376f 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -2,10 +2,10 @@ AWSTemplateFormatVersion: 2010-09-09 Parameters: - SharedJobQueueArn: + JobQueueArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name']%} + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name']%} {{ job_spec['compute_environment']['name'] }}JobQueueArn: Type: String {% endfor %} @@ -90,8 +90,8 @@ Resources: RoleArn: !GetAtt StepFunctionRole.Arn DefinitionS3Location: step-function.json DefinitionSubstitutions: - SharedJobQueueArn: !Ref SharedJobQueueArn - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name'] %} + JobQueueArn: !Ref JobQueueArn + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}JobQueueArn: !Ref {{ name }}JobQueueArn {% endfor %} @@ -135,8 +135,8 @@ Resources: - Effect: Allow Action: batch:SubmitJob Resource: - - !Ref SharedJobQueueArn - {% for job_type, job_spec in job_types.items() if 'Shared' not in job_spec['compute_environment']['name']%} + - !Ref JobQueueArn + {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name']%} - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index c98a4beaf..0b78c383b 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -62,4 +62,4 @@ AUTORIFT: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index b5808db45..867ffa520 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -53,4 +53,4 @@ ARIA_RAIDER: - RAIDER_HRES_API_KEY - RAIDER_HRES_URL compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index 3df01e633..b007dd71f 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -61,4 +61,4 @@ AUTORIFT: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index 08f7aa40b..46b9f70ef 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -75,4 +75,4 @@ AUTORIFT: - PUBLISH_ACCESS_KEY_ID - PUBLISH_SECRET_ACCESS_KEY compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 688a2cf88..5768764c7 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -120,4 +120,4 @@ INSAR_GAMMA: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 97ae639f7..f510dd0d6 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -155,4 +155,4 @@ INSAR_ISCE: - RAIDER_HRES_API_KEY - RAIDER_HRES_URL compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index 97f3527d0..cd8b931af 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -68,4 +68,4 @@ INSAR_ISCE_BURST: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index 9f6a11a0e..e0a431d1b 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -143,4 +143,4 @@ RTC_GAMMA: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'SharedComputeEnvironment' + name: 'Default' diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index 49be6e94c..b0ef1552f 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -53,4 +53,4 @@ S1_CORRECTION_TEST: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index c7996bee5..73f60a463 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -204,4 +204,4 @@ WATER_MAP: vcpu: 1 memory: 126000 compute_environment: - name: 'Shared' + name: 'Default' diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 7577e614e..d419a378c 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -114,4 +114,4 @@ WATER_MAP_EQ: vcpu: 1 memory: 126000 compute_environment: - name: 'Shared' + name: 'Default' From fb1acad59c2e5fa27fab3e32db21ee5ff9aa51c5 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 27 Aug 2024 14:10:22 -0500 Subject: [PATCH 39/62] not in to != --- apps/compute-cf.yml.j2 | 4 ++-- apps/handle-batch-event/handle-batch-event-cf.yml.j2 | 4 ++-- apps/main-cf.yml.j2 | 6 +++--- apps/scale-cluster/scale-cluster-cf.yml.j2 | 8 ++++---- apps/step-function.json.j2 | 2 +- apps/workflow-cf.yml.j2 | 6 +++--- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 735432c66..e6520beb7 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -29,7 +29,7 @@ Outputs: JobQueueArn: Value: !Ref JobQueue - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}ComputeEnvironmentArn: Value: !Ref {{ name }}ComputeEnvironment @@ -107,7 +107,7 @@ Resources: SchedulingPolicy: Type: AWS::Batch::SchedulingPolicy - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set env = job_spec['compute_environment'] %} {% set name = env['name'] %} {% set instance_types = env['instance_types'] if 'instance_types' in env else ['!Ref InstanceTypes'] %} diff --git a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 index f0b5f07b5..386ae87c6 100644 --- a/apps/handle-batch-event/handle-batch-event-cf.yml.j2 +++ b/apps/handle-batch-event/handle-batch-event-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: JobQueueArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {{ job_spec['compute_environment']['name'] }}JobQueueArn: Type: String {% endfor %} @@ -100,7 +100,7 @@ Resources: detail: jobQueue: - !Ref JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn {% endfor %} status: diff --git a/apps/main-cf.yml.j2 b/apps/main-cf.yml.j2 index 02ff8937d..13b7405d5 100644 --- a/apps/main-cf.yml.j2 +++ b/apps/main-cf.yml.j2 @@ -154,7 +154,7 @@ Resources: Properties: Parameters: ComputeEnvironmentArn: !GetAtt Cluster.Outputs.ComputeEnvironmentArn - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}ComputeEnvironmentArn: !GetAtt Cluster.Outputs.{{ name }}ComputeEnvironmentArn {% endfor %} @@ -173,7 +173,7 @@ Resources: Properties: Parameters: JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} @@ -189,7 +189,7 @@ Resources: Properties: Parameters: JobQueueArn: !GetAtt Cluster.Outputs.JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}JobQueueArn: !GetAtt Cluster.Outputs.{{ name }}JobQueueArn {% endfor %} diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index b5bbabc62..fed7dee16 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: ComputeEnvironmentArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}ComputeEnvironmentArn: Type: String @@ -87,7 +87,7 @@ Resources: Action: batch:UpdateComputeEnvironment Resource: - !Ref ComputeEnvironmentArn - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn {% endfor %} @@ -128,7 +128,7 @@ Resources: Targets: - Arn: !GetAtt Lambda.Arn Id: lambda - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} - Arn: !GetAtt {{ name }}Lambda.Arn Id: {{ name }}lambda @@ -142,7 +142,7 @@ Resources: Principal: events.amazonaws.com SourceArn: !GetAtt Schedule.Arn - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}Lambda: Type: AWS::Lambda::Function diff --git a/apps/step-function.json.j2 b/apps/step-function.json.j2 index b0598d480..78348528c 100644 --- a/apps/step-function.json.j2 +++ b/apps/step-function.json.j2 @@ -208,7 +208,7 @@ "JobDefinition": "{{ '${'+ snake_to_pascal_case(task['name']) + '}' }}", "JobName.$": "$.job_id", {% set name = job_spec['compute_environment']['name'] %} - {% set job_queue = name + 'JobQueueArn' if 'Default' not in name else 'JobQueueArn' %} + {% set job_queue = name + 'JobQueueArn' if 'Default' != name else 'JobQueueArn' %} "JobQueue": "{{ '${'+ job_queue +'}' }}", "ShareIdentifier": "default", "SchedulingPriorityOverride.$": "$.priority", diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index 694ec376f..cecde27c3 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: JobQueueArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name']%} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name']%} {{ job_spec['compute_environment']['name'] }}JobQueueArn: Type: String {% endfor %} @@ -91,7 +91,7 @@ Resources: DefinitionS3Location: step-function.json DefinitionSubstitutions: JobQueueArn: !Ref JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name'] %} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} {{ name }}JobQueueArn: !Ref {{ name }}JobQueueArn {% endfor %} @@ -136,7 +136,7 @@ Resources: Action: batch:SubmitJob Resource: - !Ref JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' not in job_spec['compute_environment']['name']%} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name']%} - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} From 7df649b4928df2ee2441bcad64b4e222ef2d9ce3 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 28 Aug 2024 12:07:57 -0500 Subject: [PATCH 40/62] move compute env before tasks --- job_spec/ARIA_AUTORIFT.yml | 4 ++-- job_spec/ARIA_RAIDER.yml | 5 +++-- job_spec/AUTORIFT.yml | 4 ++-- job_spec/AUTORIFT_ITS_LIVE.yml | 4 ++-- job_spec/INSAR_GAMMA.yml | 2 ++ job_spec/INSAR_ISCE.yml | 5 +++-- job_spec/INSAR_ISCE_BURST.yml | 2 ++ job_spec/RTC_GAMMA.yml | 2 ++ job_spec/S1_CORRECTION_ITS_LIVE.yml | 4 ++-- job_spec/SRG_GSLC.yml | 13 +++++++------ job_spec/WATER_MAP.yml | 5 +++-- job_spec/WATER_MAP_EQ.yml | 5 +++-- 12 files changed, 33 insertions(+), 22 deletions(-) diff --git a/job_spec/ARIA_AUTORIFT.yml b/job_spec/ARIA_AUTORIFT.yml index 0b78c383b..aaea5beb5 100644 --- a/job_spec/ARIA_AUTORIFT.yml +++ b/job_spec/ARIA_AUTORIFT.yml @@ -42,6 +42,8 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] + compute_environment: + name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift @@ -61,5 +63,3 @@ AUTORIFT: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - compute_environment: - name: 'Default' diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index 867ffa520..eaec9afb9 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -25,6 +25,8 @@ ARIA_RAIDER: DEFAULT: cost: 1.0 validators: [] + compute_environment: + name: 'Default' tasks: - name: '' image: ghcr.io/dbekaert/raider @@ -52,5 +54,4 @@ ARIA_RAIDER: - RAIDER_HRES_EMAIL - RAIDER_HRES_API_KEY - RAIDER_HRES_URL - compute_environment: - name: 'Default' + diff --git a/job_spec/AUTORIFT.yml b/job_spec/AUTORIFT.yml index b007dd71f..4f12faf6f 100644 --- a/job_spec/AUTORIFT.yml +++ b/job_spec/AUTORIFT.yml @@ -39,6 +39,8 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] + compute_environment: + name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift @@ -60,5 +62,3 @@ AUTORIFT: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - compute_environment: - name: 'Default' diff --git a/job_spec/AUTORIFT_ITS_LIVE.yml b/job_spec/AUTORIFT_ITS_LIVE.yml index 46b9f70ef..3b72af171 100644 --- a/job_spec/AUTORIFT_ITS_LIVE.yml +++ b/job_spec/AUTORIFT_ITS_LIVE.yml @@ -51,6 +51,8 @@ AUTORIFT: DEFAULT: cost: 1.0 validators: [] + compute_environment: + name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift @@ -74,5 +76,3 @@ AUTORIFT: - EARTHDATA_PASSWORD - PUBLISH_ACCESS_KEY_ID - PUBLISH_SECRET_ACCESS_KEY - compute_environment: - name: 'Default' diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index 5768764c7..b37df6b48 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -82,6 +82,8 @@ INSAR_GAMMA: cost: 1.0 validators: - check_dem_coverage + compute_environment: + name: 'Default' tasks: - name: '' image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index f510dd0d6..1fe79b098 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -92,6 +92,8 @@ INSAR_ISCE: DEFAULT: cost: 1.0 validators: [] + compute_environment: + name: 'Default' tasks: - name: '' image: ghcr.io/access-cloud-based-insar/dockerizedtopsapp @@ -154,5 +156,4 @@ INSAR_ISCE: - RAIDER_HRES_EMAIL - RAIDER_HRES_API_KEY - RAIDER_HRES_URL - compute_environment: - name: 'Default' + diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index cd8b931af..ffb7960e7 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -44,6 +44,8 @@ INSAR_ISCE_BURST: - check_valid_polarizations - check_same_burst_ids - check_not_antimeridian + compute_environment: + name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-isce2 diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index e0a431d1b..42dda7b60 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -103,6 +103,8 @@ RTC_GAMMA: cost: 1.0 validators: - check_dem_coverage + compute_environment: + name: 'Default' tasks: - name: '' image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma diff --git a/job_spec/S1_CORRECTION_ITS_LIVE.yml b/job_spec/S1_CORRECTION_ITS_LIVE.yml index b0ef1552f..8de729ce6 100644 --- a/job_spec/S1_CORRECTION_ITS_LIVE.yml +++ b/job_spec/S1_CORRECTION_ITS_LIVE.yml @@ -29,6 +29,8 @@ S1_CORRECTION_TEST: DEFAULT: cost: 1.0 validators: [] + compute_environment: + name: 'Default' tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-autorift @@ -52,5 +54,3 @@ S1_CORRECTION_TEST: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - compute_environment: - name: 'Default' diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 81f2c39e4..e9718249d 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -24,6 +24,12 @@ SRG_GSLC: cost_profiles: DEFAULT: cost: 1.0 + compute_environment: + name: SrgGslc + instance_types: + - g6.2xlarge + # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id + ami_id: ami-0ca2af66da8e56876 tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-srg @@ -43,9 +49,4 @@ SRG_GSLC: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - compute_environment: - name: SrgGslc - instance_types: - - g6.2xlarge - # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id - ami_id: ami-0ca2af66da8e56876 + diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 73f60a463..7ef8f4631 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -116,6 +116,8 @@ WATER_MAP: cost: 1.0 validators: - check_dem_coverage + compute_environment: + name: 'Default' tasks: - name: RTC image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma @@ -203,5 +205,4 @@ WATER_MAP: timeout: 86400 vcpu: 1 memory: 126000 - compute_environment: - name: 'Default' + diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index d419a378c..6ffb47f0c 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -58,6 +58,8 @@ WATER_MAP_EQ: cost: 1.0 validators: - check_dem_coverage + compute_environment: + name: 'Default' tasks: - name: RTC image: 845172464411.dkr.ecr.us-west-2.amazonaws.com/hyp3-gamma @@ -113,5 +115,4 @@ WATER_MAP_EQ: timeout: 36000 vcpu: 1 memory: 126000 - compute_environment: - name: 'Default' + From 6d5fdcad7f6d403f2e961675b8333aa924d565a5 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 28 Aug 2024 12:08:04 -0500 Subject: [PATCH 41/62] refactoring --- apps/compute-cf.yml.j2 | 4 ++-- apps/scale-cluster/scale-cluster-cf.yml.j2 | 8 +++++++- apps/workflow-cf.yml.j2 | 6 ++++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index e6520beb7..5de0d5407 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -27,7 +27,7 @@ Outputs: Value: !Ref ComputeEnvironment JobQueueArn: - Value: !Ref JobQueue + Value: !Ref BatchJobQueue {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} @@ -95,7 +95,7 @@ Resources: Tags: Name: !Ref AWS::StackName - JobQueue: + BatchJobQueue: Type: AWS::Batch::JobQueue Properties: Priority: 1 diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index fed7dee16..c69c4db3f 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -144,12 +144,18 @@ Resources: {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set name = job_spec['compute_environment']['name'] %} + {{ name }}LogGroup: + Type: AWS::Logs::LogGroup + Properties: + LogGroupName: !Sub {{ "/aws/lambda/${"+ name +"LogGroup}" }} + RetentionInDays: 90 + {{ name }}Lambda: Type: AWS::Lambda::Function Properties: Environment: Variables: - COMPUTE_ENVIRONMENT_ARN: !Ref {{name }}ComputeEnvironmentArn + COMPUTE_ENVIRONMENT_ARN: !Ref {{ name }}ComputeEnvironmentArn MONTHLY_BUDGET: !Ref MonthlyBudget DEFAULT_MAX_VCPUS: !Ref DefaultMaxvCpus EXPANDED_MAX_VCPUS: !Ref ExpandedMaxvCpus diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index cecde27c3..df17a0b0a 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -5,7 +5,7 @@ Parameters: JobQueueArn: Type: String - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name']%} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {{ job_spec['compute_environment']['name'] }}JobQueueArn: Type: String {% endfor %} @@ -66,8 +66,10 @@ Resources: Value: "{{ task['vcpu'] }}" - Type: MEMORY Value: "{{ task['memory'] }}" + {% if 'gpu' in task %} - Type: GPU - Value: "{{ task['gpu'] if 'gpu' in task else 0 }}" + Value: "{{ task['gpu'] }}" + {% endif %} Command: {% for command in task['command'] %} - {{ command }} From aafeba1a484a1833f15eb02b37f769788e6b64ff Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 28 Aug 2024 12:13:28 -0500 Subject: [PATCH 42/62] Log Group correction --- apps/scale-cluster/scale-cluster-cf.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index c69c4db3f..d46937b14 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -147,7 +147,7 @@ Resources: {{ name }}LogGroup: Type: AWS::Logs::LogGroup Properties: - LogGroupName: !Sub {{ "/aws/lambda/${"+ name +"LogGroup}" }} + LogGroupName: !Sub {{ "/aws/lambda/${"+ name +"Lambda}" }} RetentionInDays: 90 {{ name }}Lambda: From f9afc1af5f932757dd11a4f0954c66c8bfb66cda Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 28 Aug 2024 12:38:38 -0500 Subject: [PATCH 43/62] removed spaces --- job_spec/ARIA_RAIDER.yml | 1 - job_spec/INSAR_ISCE.yml | 1 - job_spec/SRG_GSLC.yml | 1 - job_spec/WATER_MAP.yml | 1 - job_spec/WATER_MAP_EQ.yml | 1 - 5 files changed, 5 deletions(-) diff --git a/job_spec/ARIA_RAIDER.yml b/job_spec/ARIA_RAIDER.yml index eaec9afb9..840a67154 100644 --- a/job_spec/ARIA_RAIDER.yml +++ b/job_spec/ARIA_RAIDER.yml @@ -54,4 +54,3 @@ ARIA_RAIDER: - RAIDER_HRES_EMAIL - RAIDER_HRES_API_KEY - RAIDER_HRES_URL - diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index 1fe79b098..ff1b58166 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -156,4 +156,3 @@ INSAR_ISCE: - RAIDER_HRES_EMAIL - RAIDER_HRES_API_KEY - RAIDER_HRES_URL - diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index e9718249d..9d4fe9e87 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -49,4 +49,3 @@ SRG_GSLC: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - diff --git a/job_spec/WATER_MAP.yml b/job_spec/WATER_MAP.yml index 7ef8f4631..89105c97a 100644 --- a/job_spec/WATER_MAP.yml +++ b/job_spec/WATER_MAP.yml @@ -205,4 +205,3 @@ WATER_MAP: timeout: 86400 vcpu: 1 memory: 126000 - diff --git a/job_spec/WATER_MAP_EQ.yml b/job_spec/WATER_MAP_EQ.yml index 6ffb47f0c..d4a16d23f 100644 --- a/job_spec/WATER_MAP_EQ.yml +++ b/job_spec/WATER_MAP_EQ.yml @@ -115,4 +115,3 @@ WATER_MAP_EQ: timeout: 36000 vcpu: 1 memory: 126000 - From 8c39fd0c5800f8217b811d07a191e26b4aa8d7d8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 28 Aug 2024 12:52:17 -0500 Subject: [PATCH 44/62] removed dupe --- job_spec/INSAR_GAMMA.yml | 2 -- job_spec/INSAR_ISCE_BURST.yml | 2 -- job_spec/RTC_GAMMA.yml | 2 -- 3 files changed, 6 deletions(-) diff --git a/job_spec/INSAR_GAMMA.yml b/job_spec/INSAR_GAMMA.yml index b37df6b48..2d376bf8d 100644 --- a/job_spec/INSAR_GAMMA.yml +++ b/job_spec/INSAR_GAMMA.yml @@ -121,5 +121,3 @@ INSAR_GAMMA: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - compute_environment: - name: 'Default' diff --git a/job_spec/INSAR_ISCE_BURST.yml b/job_spec/INSAR_ISCE_BURST.yml index ffb7960e7..d741b5aa4 100644 --- a/job_spec/INSAR_ISCE_BURST.yml +++ b/job_spec/INSAR_ISCE_BURST.yml @@ -69,5 +69,3 @@ INSAR_ISCE_BURST: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - compute_environment: - name: 'Default' diff --git a/job_spec/RTC_GAMMA.yml b/job_spec/RTC_GAMMA.yml index 42dda7b60..393814074 100644 --- a/job_spec/RTC_GAMMA.yml +++ b/job_spec/RTC_GAMMA.yml @@ -144,5 +144,3 @@ RTC_GAMMA: secrets: - EARTHDATA_USERNAME - EARTHDATA_PASSWORD - compute_environment: - name: 'Default' From a446d03151b212f69f254793253e583a5835463a Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 28 Aug 2024 12:53:27 -0500 Subject: [PATCH 45/62] ability to specify allocation type/strat --- apps/compute-cf.yml.j2 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 5de0d5407..f5d4c2ed5 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -112,14 +112,16 @@ Resources: {% set name = env['name'] %} {% set instance_types = env['instance_types'] if 'instance_types' in env else ['!Ref InstanceTypes'] %} {% set ami_id = env['ami_id'] if 'ami_id' in env else '!Ref AmiId' %} + {% set type = env['allocation_type'] if 'allocation_type' in env else 'SPOT' %} + {% set strategy = env['allocation_strategy'] if 'allocation_strategy' in env else 'SPOT_PRICE_CAPACITY_OPTIMIZED' %} {{ name }}ComputeEnvironment: Type: AWS::Batch::ComputeEnvironment Properties: ServiceRole: !GetAtt BatchServiceRole.Arn Type: MANAGED ComputeResources: - Type: SPOT - AllocationStrategy: SPOT_PRICE_CAPACITY_OPTIMIZED + Type: {{ type }} + AllocationStrategy: {{ strategy }} MinvCpus: 0 MaxvCpus: !Ref MaxvCpus InstanceTypes: From ddb2797385db8cefd2fb1d45c8dc0c79224fc979 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 28 Aug 2024 14:24:15 -0800 Subject: [PATCH 46/62] Add missing whitespace --- apps/scale-cluster/scale-cluster-cf.yml.j2 | 10 +++++----- apps/step-function.json.j2 | 2 +- apps/workflow-cf.yml.j2 | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index d46937b14..b90b3608c 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -86,10 +86,10 @@ Resources: - Effect: Allow Action: batch:UpdateComputeEnvironment Resource: - - !Ref ComputeEnvironmentArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - - !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn - {% endfor %} + - !Ref ComputeEnvironmentArn + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} + - !Ref {{ job_spec['compute_environment']['name'] }}ComputeEnvironmentArn + {% endfor %} Lambda: Type: AWS::Lambda::Function @@ -147,7 +147,7 @@ Resources: {{ name }}LogGroup: Type: AWS::Logs::LogGroup Properties: - LogGroupName: !Sub {{ "/aws/lambda/${"+ name +"Lambda}" }} + LogGroupName: !Sub {{ "/aws/lambda/${" + name + "Lambda}" }} RetentionInDays: 90 {{ name }}Lambda: diff --git a/apps/step-function.json.j2 b/apps/step-function.json.j2 index 78348528c..68506f211 100644 --- a/apps/step-function.json.j2 +++ b/apps/step-function.json.j2 @@ -209,7 +209,7 @@ "JobName.$": "$.job_id", {% set name = job_spec['compute_environment']['name'] %} {% set job_queue = name + 'JobQueueArn' if 'Default' != name else 'JobQueueArn' %} - "JobQueue": "{{ '${'+ job_queue +'}' }}", + "JobQueue": "{{ '${' + job_queue + '}' }}", "ShareIdentifier": "default", "SchedulingPriorityOverride.$": "$.priority", "Parameters.$": "$.job_parameters", diff --git a/apps/workflow-cf.yml.j2 b/apps/workflow-cf.yml.j2 index df17a0b0a..ec740395b 100644 --- a/apps/workflow-cf.yml.j2 +++ b/apps/workflow-cf.yml.j2 @@ -138,7 +138,7 @@ Resources: Action: batch:SubmitJob Resource: - !Ref JobQueueArn - {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name']%} + {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} - !Ref {{ job_spec['compute_environment']['name'] }}JobQueueArn {% endfor %} {% for job_type, job_spec in job_types.items() %} From cb8fbb897c2a75440c41959dbd8881ab37a44b4a Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 28 Aug 2024 14:32:01 -0800 Subject: [PATCH 47/62] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c1114994..b774b1b75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [7.8.0] ### Added -- Support for per-job specification of instance types and AMIs. +- Allow overriding certain AWS Batch compute environment parameters (including instance types and AMI) within a job spec. +- Allow job spec tasks to require GPU resource requirements. ## [7.7.2] From a141a47290fa23ed575ae7c65e53d59021bd1c3e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 28 Aug 2024 14:34:16 -0800 Subject: [PATCH 48/62] Update CHANGELOG.md --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b774b1b75..1865dc0eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Allow overriding certain AWS Batch compute environment parameters (including instance types and AMI) within a job spec. -- Allow job spec tasks to require GPU resource requirements. +- Allow job spec tasks to require GPU resources. + +### Changed +- The `SRG_GSLC` job type now runs within a GPU environment. ## [7.7.2] From c943c0caae00b36eba615c8b1b55c242541c135d Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 28 Aug 2024 15:12:38 -0800 Subject: [PATCH 49/62] minor fixes based on local rendering --- apps/compute-cf.yml.j2 | 1 - apps/scale-cluster/scale-cluster-cf.yml.j2 | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index f5d4c2ed5..4fcd618cc 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -36,7 +36,6 @@ Outputs: {{ name }}JobQueueArn: Value: !Ref {{ name }}JobQueue - {% endfor %} TaskRoleArn: diff --git a/apps/scale-cluster/scale-cluster-cf.yml.j2 b/apps/scale-cluster/scale-cluster-cf.yml.j2 index b90b3608c..358f095b3 100644 --- a/apps/scale-cluster/scale-cluster-cf.yml.j2 +++ b/apps/scale-cluster/scale-cluster-cf.yml.j2 @@ -147,7 +147,7 @@ Resources: {{ name }}LogGroup: Type: AWS::Logs::LogGroup Properties: - LogGroupName: !Sub {{ "/aws/lambda/${" + name + "Lambda}" }} + LogGroupName: !Sub "{{ '/aws/lambda/${' + name + 'Lambda}' }}" RetentionInDays: 90 {{ name }}Lambda: From 91e36e199fc3a04f1b959cdce04ed12784a18835 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 29 Aug 2024 14:19:47 -0500 Subject: [PATCH 50/62] correct ami and image tag --- job_spec/SRG_GSLC.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index 9d4fe9e87..fe09bd559 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -28,11 +28,12 @@ SRG_GSLC: name: SrgGslc instance_types: - g6.2xlarge - # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id - ami_id: ami-0ca2af66da8e56876 + # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id + ami_id: ami-0729c079aae647cb3 tasks: - name: '' image: ghcr.io/asfhyp3/hyp3-srg + image_tag: 0.7.0.gpu command: - ++process - back_projection From 5fc758477726d6c7f4c68438687892d12d1b33a1 Mon Sep 17 00:00:00 2001 From: Charlie Marshak Date: Tue, 27 Aug 2024 12:16:20 -0700 Subject: [PATCH 51/62] Update INSAR_ISCE.yml --- job_spec/INSAR_ISCE.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index ff1b58166..b5bafb6fc 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -99,7 +99,7 @@ INSAR_ISCE: image: ghcr.io/access-cloud-based-insar/dockerizedtopsapp command: - ++omp-num-threads - - '2' # 2 for the m instance family; 4 for the c + - '4' # 2 for the m instance family; 4 for the c - --bucket - '!Ref Bucket' - --bucket-prefix From c9ef6ab10f194c66699c58bb964ab97e9e464308 Mon Sep 17 00:00:00 2001 From: Charlie Marshak Date: Tue, 27 Aug 2024 12:14:39 -0700 Subject: [PATCH 52/62] Update deploy-enterprise.yml --- .github/workflows/deploy-enterprise.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-enterprise.yml b/.github/workflows/deploy-enterprise.yml index 74fd1d6fe..fee664ef1 100644 --- a/.github/workflows/deploy-enterprise.yml +++ b/.github/workflows/deploy-enterprise.yml @@ -45,7 +45,7 @@ jobs: job_spec/ARIA_AUTORIFT.yml job_spec/ARIA_RAIDER.yml job_spec/INSAR_ISCE.yml - instance_types: m6id.xlarge,m6id.2xlarge,m6id.4xlarge,m6id.8xlarge,m6idn.xlarge,m6idn.2xlarge,m6idn.4xlarge,m6idn.8xlarge + instance_types: c6id.xlarge,c6id.2xlarge,c6id.4xlarge,c6id.8xlarge default_max_vcpus: 4000 # Max: 13000 expanded_max_vcpus: 4000 # Max: 13000 required_surplus: 0 @@ -65,7 +65,7 @@ jobs: job_spec/ARIA_AUTORIFT.yml job_spec/ARIA_RAIDER.yml job_spec/INSAR_ISCE.yml - instance_types: m6id.xlarge,m6id.2xlarge,m6id.4xlarge,m6id.8xlarge,m6idn.xlarge,m6idn.2xlarge,m6idn.4xlarge,m6idn.8xlarge + instance_types: c6id.xlarge,c6id.2xlarge,c6id.4xlarge,c6id.8xlarge default_max_vcpus: 1000 # Max: 10316 expanded_max_vcpus: 1000 # Max: 10316 required_surplus: 0 @@ -85,7 +85,7 @@ jobs: job_spec/ARIA_AUTORIFT.yml job_spec/ARIA_RAIDER.yml job_spec/INSAR_ISCE.yml - instance_types: m6id.xlarge,m6id.2xlarge,m6id.4xlarge,m6id.8xlarge,m6idn.xlarge,m6idn.2xlarge,m6idn.4xlarge,m6idn.8xlarge + instance_types: c6id.xlarge,c6id.2xlarge,c6id.4xlarge,c6id.8xlarge default_max_vcpus: 1600 # Max 1652 expanded_max_vcpus: 1600 # Max 1652 required_surplus: 0 From b50493516061b0f3ac5f331148afa35979c8b9fd Mon Sep 17 00:00:00 2001 From: Charlie Marshak Date: Tue, 27 Aug 2024 12:20:13 -0700 Subject: [PATCH 53/62] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1865dc0eb..8e848e2da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [7.7.3] + +### Change +- Revert ARIA hyp3 deployments back to C-instance family - including the job-spec CLI parmeter `omp-num-threads` to ensure multiple jobs fitting on single instance. ## [7.8.0] From 606cdd8d0a5fea29ca0cb7c3e0ce0402982dc1aa Mon Sep 17 00:00:00 2001 From: Charlie Marshak Date: Tue, 27 Aug 2024 12:22:45 -0700 Subject: [PATCH 54/62] Update deploy-enterprise-test.yml --- .github/workflows/deploy-enterprise-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-enterprise-test.yml b/.github/workflows/deploy-enterprise-test.yml index a5b3895f4..dd6f32263 100644 --- a/.github/workflows/deploy-enterprise-test.yml +++ b/.github/workflows/deploy-enterprise-test.yml @@ -50,7 +50,7 @@ jobs: job_spec/ARIA_AUTORIFT.yml job_spec/ARIA_RAIDER.yml job_spec/INSAR_ISCE.yml - instance_types: m6id.xlarge,m6id.2xlarge,m6id.4xlarge,m6id.8xlarge,m6idn.xlarge,m6idn.2xlarge,m6idn.4xlarge,m6idn.8xlarge + instance_types: c6id.xlarge,c6id.2xlarge,c6id.4xlarge,c6id.8xlarge default_max_vcpus: 640 expanded_max_vcpus: 640 required_surplus: 0 From a4559456137d8fd6cf013f574ff02ab2b6299df2 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 29 Aug 2024 13:48:29 -0800 Subject: [PATCH 55/62] fix changelog --- CHANGELOG.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e848e2da..447cad543 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,11 +4,6 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [7.7.3] - -### Change -- Revert ARIA hyp3 deployments back to C-instance family - including the job-spec CLI parmeter `omp-num-threads` to ensure multiple jobs fitting on single instance. - ## [7.8.0] ### Added @@ -17,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - The `SRG_GSLC` job type now runs within a GPU environment. +- Revert ARIA hyp3 deployments back to C-instance family - including the job-spec CLI parameter `omp-num-threads` to ensure multiple jobs fit on single instance. ## [7.7.2] From 12097015aa2b4c6dd39d6c8292fd3edeab07d31e Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 29 Aug 2024 13:57:22 -0800 Subject: [PATCH 56/62] use dedicateed on-demand compute environment for INSAR_ISCE jobs --- job_spec/INSAR_ISCE.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index b5bafb6fc..c02ac3ea0 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -93,7 +93,9 @@ INSAR_ISCE: cost: 1.0 validators: [] compute_environment: - name: 'Default' + name: 'Aria' + allocation_type: EC2 + allocation_strategy: BEST_FIT_PROGRESSIVE tasks: - name: '' image: ghcr.io/access-cloud-based-insar/dockerizedtopsapp From fa1f6eb4b000bf5c6cffc8c3e3dfd350cddbe121 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 29 Aug 2024 14:02:14 -0800 Subject: [PATCH 57/62] Add aira compute env to changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 447cad543..fde59df15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + ## [7.8.0] ### Added @@ -13,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - The `SRG_GSLC` job type now runs within a GPU environment. - Revert ARIA hyp3 deployments back to C-instance family - including the job-spec CLI parameter `omp-num-threads` to ensure multiple jobs fit on single instance. - +- Deployments with INSAR_ISCE.yml job specs will now use a dedicated compute environment with on-demand instances instead of spot instances for INSAR_ISCE jobs. ## [7.7.2] From 70952d01e0232de66a0f2d4a9cafcee0c793dbbd Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 29 Aug 2024 14:07:15 -0800 Subject: [PATCH 58/62] fix instance types rendering --- apps/compute-cf.yml.j2 | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 4fcd618cc..c8f3d7836 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -123,10 +123,7 @@ Resources: AllocationStrategy: {{ strategy }} MinvCpus: 0 MaxvCpus: !Ref MaxvCpus - InstanceTypes: - {% for instance_type in instance_types %} - - {{ instance_type }} - {% endfor %} + InstanceTypes: {{ instance_type }} ImageId: {{ ami_id }} Subnets: !Ref SubnetIds InstanceRole: !Ref InstanceProfile @@ -146,7 +143,7 @@ Resources: - ComputeEnvironment: !Ref {{ name }}ComputeEnvironment Order: 1 SchedulingPolicyArn: !Ref SchedulingPolicy - + {% endfor %} TaskRole: From cab8fd29abadedf5386625bf60162cd0d592305b Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 29 Aug 2024 14:11:09 -0800 Subject: [PATCH 59/62] fix typo --- apps/compute-cf.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index c8f3d7836..4cd0f21d8 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -123,7 +123,7 @@ Resources: AllocationStrategy: {{ strategy }} MinvCpus: 0 MaxvCpus: !Ref MaxvCpus - InstanceTypes: {{ instance_type }} + InstanceTypes: {{ instance_types }} ImageId: {{ ami_id }} Subnets: !Ref SubnetIds InstanceRole: !Ref InstanceProfile From abec46d4b82c7b8ff2fe8bc2fc72219ecf870d64 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 29 Aug 2024 14:32:40 -0800 Subject: [PATCH 60/62] render instance types as comma-delimited list --- apps/compute-cf.yml.j2 | 2 +- job_spec/SRG_GSLC.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 4cd0f21d8..776a3c11a 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -109,7 +109,7 @@ Resources: {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set env = job_spec['compute_environment'] %} {% set name = env['name'] %} - {% set instance_types = env['instance_types'] if 'instance_types' in env else ['!Ref InstanceTypes'] %} + {% set instance_types = env['instance_types'] if 'instance_types' in env else '!Ref InstanceTypes' %} {% set ami_id = env['ami_id'] if 'ami_id' in env else '!Ref AmiId' %} {% set type = env['allocation_type'] if 'allocation_type' in env else 'SPOT' %} {% set strategy = env['allocation_strategy'] if 'allocation_strategy' in env else 'SPOT_PRICE_CAPACITY_OPTIMIZED' %} diff --git a/job_spec/SRG_GSLC.yml b/job_spec/SRG_GSLC.yml index fe09bd559..4f1f47428 100644 --- a/job_spec/SRG_GSLC.yml +++ b/job_spec/SRG_GSLC.yml @@ -26,8 +26,7 @@ SRG_GSLC: cost: 1.0 compute_environment: name: SrgGslc - instance_types: - - g6.2xlarge + instance_types: g6.2xlarge # Image ID for: /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended/image_id ami_id: ami-0729c079aae647cb3 tasks: From 985a803be7f8c13ee350a9a2beaab7f3973df92e Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 29 Aug 2024 14:37:34 -0800 Subject: [PATCH 61/62] Update job_spec/INSAR_ISCE.yml Co-authored-by: Jake Herrmann --- job_spec/INSAR_ISCE.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job_spec/INSAR_ISCE.yml b/job_spec/INSAR_ISCE.yml index c02ac3ea0..a213eaaab 100644 --- a/job_spec/INSAR_ISCE.yml +++ b/job_spec/INSAR_ISCE.yml @@ -93,7 +93,7 @@ INSAR_ISCE: cost: 1.0 validators: [] compute_environment: - name: 'Aria' + name: 'InsarIsceAria' allocation_type: EC2 allocation_strategy: BEST_FIT_PROGRESSIVE tasks: From a1297fba45ddc7e3ee0a3c141d4552c4c06f2e40 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 29 Aug 2024 14:42:57 -0800 Subject: [PATCH 62/62] comma deliminated list --- apps/compute-cf.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/compute-cf.yml.j2 b/apps/compute-cf.yml.j2 index 776a3c11a..80e52b489 100644 --- a/apps/compute-cf.yml.j2 +++ b/apps/compute-cf.yml.j2 @@ -109,7 +109,7 @@ Resources: {% for job_type, job_spec in job_types.items() if 'Default' != job_spec['compute_environment']['name'] %} {% set env = job_spec['compute_environment'] %} {% set name = env['name'] %} - {% set instance_types = env['instance_types'] if 'instance_types' in env else '!Ref InstanceTypes' %} + {% set instance_types = env['instance_types'].split(',') if 'instance_types' in env else '!Ref InstanceTypes' %} {% set ami_id = env['ami_id'] if 'ami_id' in env else '!Ref AmiId' %} {% set type = env['allocation_type'] if 'allocation_type' in env else 'SPOT' %} {% set strategy = env['allocation_strategy'] if 'allocation_strategy' in env else 'SPOT_PRICE_CAPACITY_OPTIMIZED' %}