diff --git a/README.md b/README.md index ef1b04c..21584ce 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # dsub: simple batch jobs with Docker -[![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](https://github.com/DataBiosphere/dsub/blob/master/LICENSE) +[![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](https://github.com/DataBiosphere/dsub/blob/main/LICENSE) ## Overview @@ -52,7 +52,7 @@ To deactivate the virtual environment in your shell, run the command: Alternatively, a set of convenience scripts are provided that activate the virutalenv before calling `dsub`, `dstat`, and `ddel`. They are in the -[bin](https://github.com/DataBiosphere/dsub/tree/master/bin) directory. You can +[bin](https://github.com/DataBiosphere/dsub/tree/main/bin) directory. You can use these scripts if you don't want to activate the virtualenv explicitly in your shell. @@ -102,7 +102,7 @@ Choose one of the following: ### Makefile After cloning the dsub repo, you can also use the -[Makefile](https://github.com/DataBiosphere/dsub/blob/master/Makefile) +[Makefile](https://github.com/DataBiosphere/dsub/blob/main/Makefile) by running: make @@ -243,7 +243,7 @@ implements a consistent runtime environment. The current providers are: - google-cls-v2 (*new*) More details on the runtime environment implemented by the backend providers -can be found in [dsub backend providers](https://github.com/DataBiosphere/dsub/blob/master/docs/providers/README.md). +can be found in [dsub backend providers](https://github.com/DataBiosphere/dsub/blob/main/docs/providers/README.md). ### Differences between `google-v2` and `google-cls-v2` @@ -315,7 +315,7 @@ Note: your `--image` must include the For more information on using the `--image` flag, see the -[image section in Scripts, Commands, and Docker](https://github.com/DataBiosphere/dsub/blob/master/docs/code.md#--image-docker-image) +[image section in Scripts, Commands, and Docker](https://github.com/DataBiosphere/dsub/blob/main/docs/code.md#--image-docker-image) ### Passing parameters to your script @@ -335,7 +335,7 @@ environment variable, as `${MESSAGE}`. **Be sure to enclose your command string in single quotes and not double quotes. If you use double quotes, the command will be expanded in your local shell before being passed to dsub. For more information on using the -`--command` flag, see [Scripts, Commands, and Docker](https://github.com/DataBiosphere/dsub/blob/master/docs/code.md)** +`--command` flag, see [Scripts, Commands, and Docker](https://github.com/DataBiosphere/dsub/blob/main/docs/code.md)** To set multiple environment variables, you can repeat the flag: @@ -356,7 +356,7 @@ the cloud storage bucket path. Paths can be: * folder paths like `gs://my-bucket/my-folder` * wildcard paths like `gs://my-bucket/my-folder/*` -See the [inputs and outputs](https://github.com/DataBiosphere/dsub/blob/master/docs/input_output.md) +See the [inputs and outputs](https://github.com/DataBiosphere/dsub/blob/main/docs/input_output.md) documentation for more details. ### Transferring input files to a Google Cloud Storage bucket. @@ -484,7 +484,7 @@ your local machine. `dsub` tasks run using the `google`, `google-v2`, or `google-cls-v2` providers can take advantage of a wide range of CPU, RAM, disk, and hardware accelerator (eg. GPU) options. -See the [Compute Resources](https://github.com/DataBiosphere/dsub/blob/master/docs/compute_resources.md) +See the [Compute Resources](https://github.com/DataBiosphere/dsub/blob/main/docs/compute_resources.md) documentation for details. ### Submitting a batch job @@ -549,17 +549,17 @@ The task range values can take any of the following forms: ### Logging The `--logging` flag points to a location for `dsub` task log files. For details -on how to specify your logging path, see [Logging](https://github.com/DataBiosphere/dsub/blob/master/docs/logging.md). +on how to specify your logging path, see [Logging](https://github.com/DataBiosphere/dsub/blob/main/docs/logging.md). ### Job control It's possible to wait for a job to complete before starting another. -For details, see [job control with dsub](https://github.com/DataBiosphere/dsub/blob/master/docs/job_control.md). +For details, see [job control with dsub](https://github.com/DataBiosphere/dsub/blob/main/docs/job_control.md). ### Retries It is possible for `dsub` to automatically retry failed tasks. -For details, see [retries with dsub](https://github.com/DataBiosphere/dsub/blob/master/docs/retries.md). +For details, see [retries with dsub](https://github.com/DataBiosphere/dsub/blob/main/docs/retries.md). ### Labeling jobs and tasks @@ -568,7 +568,7 @@ cancel tasks using your own identifiers. In addition, with the Google providers, labeling a task will label associated compute resources such as virtual machines and disks. -For more details, see [Checking Status and Troubleshooting Jobs](https://github.com/DataBiosphere/dsub/blob/master/docs/troubleshooting.md) +For more details, see [Checking Status and Troubleshooting Jobs](https://github.com/DataBiosphere/dsub/blob/main/docs/troubleshooting.md) ### Viewing job status @@ -599,12 +599,12 @@ each job includes: gets a sequential value of the form "task-*n*" where *n* is 1-based. Note that the job metadata values will be modified to conform with the "Label -Restrictions" listed in the [Checking Status and Troubleshooting Jobs](https://github.com/DataBiosphere/dsub/blob/master/docs/troubleshooting.md) +Restrictions" listed in the [Checking Status and Troubleshooting Jobs](https://github.com/DataBiosphere/dsub/blob/main/docs/troubleshooting.md) guide. Metadata can be used to cancel a job or individual tasks within a batch job. -For more details, see [Checking Status and Troubleshooting Jobs](https://github.com/DataBiosphere/dsub/blob/master/docs/troubleshooting.md) +For more details, see [Checking Status and Troubleshooting Jobs](https://github.com/DataBiosphere/dsub/blob/main/docs/troubleshooting.md) #### Summarizing job status @@ -667,8 +667,7 @@ The image below illustrates this: ![Pipelines Runner Architecture](./docs/images/pipelines_runner_architecture.png) -By default, `dsub` will use the [default Compute Engine service account] -(https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) +By default, `dsub` will use the [default Compute Engine service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) as the authorized service account on the VM instance. You can choose to specify the email address of another service acount using `--service-account`. @@ -716,19 +715,19 @@ of the service account will be `sa-name@project-id.iam.gserviceaccount.com`. * See the examples: - * [Custom scripts](https://github.com/DataBiosphere/dsub/tree/master/examples/custom_scripts) - * [Decompress files](https://github.com/DataBiosphere/dsub/tree/master/examples/decompress) - * [FastQC](https://github.com/DataBiosphere/dsub/tree/master/examples/fastqc) - * [Samtools index](https://github.com/DataBiosphere/dsub/tree/master/examples/samtools) + * [Custom scripts](https://github.com/DataBiosphere/dsub/tree/main/examples/custom_scripts) + * [Decompress files](https://github.com/DataBiosphere/dsub/tree/main/examples/decompress) + * [FastQC](https://github.com/DataBiosphere/dsub/tree/main/examples/fastqc) + * [Samtools index](https://github.com/DataBiosphere/dsub/tree/main/examples/samtools) * See more documentation for: - * [Scripts, Commands, and Docker](https://github.com/DataBiosphere/dsub/blob/master/docs/code.md) - * [Input and Output File Handling](https://github.com/DataBiosphere/dsub/blob/master/docs/input_output.md) - * [Logging](https://github.com/DataBiosphere/dsub/blob/master/docs/logging.md) - * [Compute Resources](https://github.com/DataBiosphere/dsub/blob/master/docs/compute_resources.md) - * [Compute Quotas](https://github.com/DataBiosphere/dsub/blob/master/docs/compute_quotas.md) - * [Job Control](https://github.com/DataBiosphere/dsub/blob/master/docs/job_control.md) - * [Retries](https://github.com/DataBiosphere/dsub/blob/master/docs/retries.md) - * [Checking Status and Troubleshooting Jobs](https://github.com/DataBiosphere/dsub/blob/master/docs/troubleshooting.md) - * [Backend providers](https://github.com/DataBiosphere/dsub/blob/master/docs/providers/README.md) + * [Scripts, Commands, and Docker](https://github.com/DataBiosphere/dsub/blob/main/docs/code.md) + * [Input and Output File Handling](https://github.com/DataBiosphere/dsub/blob/main/docs/input_output.md) + * [Logging](https://github.com/DataBiosphere/dsub/blob/main/docs/logging.md) + * [Compute Resources](https://github.com/DataBiosphere/dsub/blob/main/docs/compute_resources.md) + * [Compute Quotas](https://github.com/DataBiosphere/dsub/blob/main/docs/compute_quotas.md) + * [Job Control](https://github.com/DataBiosphere/dsub/blob/main/docs/job_control.md) + * [Retries](https://github.com/DataBiosphere/dsub/blob/main/docs/retries.md) + * [Checking Status and Troubleshooting Jobs](https://github.com/DataBiosphere/dsub/blob/main/docs/troubleshooting.md) + * [Backend providers](https://github.com/DataBiosphere/dsub/blob/main/docs/providers/README.md) diff --git a/docs/code.md b/docs/code.md index f8f0cdb..1ba14ef 100644 --- a/docs/code.md +++ b/docs/code.md @@ -3,7 +3,7 @@ `dsub` provides a few choices for how to get your code into the Docker container in order to run: -* --command "shell string" +* --command 'shell string' * --script "script file (Bash, Python, etc.)" * --image "Docker image" * --input "path to file in cloud storage" diff --git a/docs/compute_quotas.md b/docs/compute_quotas.md index 0c6a22a..7e013fa 100644 --- a/docs/compute_quotas.md +++ b/docs/compute_quotas.md @@ -126,7 +126,7 @@ Most commonly, the quotas relevant for `dsub` tasks are: > **_NOTE:_** To eliminate dependence on the `In-use IP addresses` quota, > the Google providers support the `--use-private-address` flag. > See the `Public IP addresses` section of -> [Compute Resources](https://github.com/DataBiosphere/dsub/blob/master/docs/compute_resources.md). +> [Compute Resources](https://github.com/DataBiosphere/dsub/blob/main/docs/compute_resources.md). ## Troubleshooting Quota issues diff --git a/docs/compute_resources.md b/docs/compute_resources.md index 1e09200..1e8a9c4 100644 --- a/docs/compute_resources.md +++ b/docs/compute_resources.md @@ -92,7 +92,7 @@ For more information on Compute Engine IP addresses, see: Running the job on VMs without a public IP address has the advantage that it does not consume `In-use IP addresses` quota, which can otherwise limit your ability to scale up your concurrently running tasks. -See the [Compute Quotas](https://github.com/DataBiosphere/dsub/blob/master/docs/compute_quotas.md) +See the [Compute Quotas](https://github.com/DataBiosphere/dsub/blob/main/docs/compute_quotas.md) documentation for more details. Running jobs on VMs without a public IP address requires the following: diff --git a/docs/providers/README.md b/docs/providers/README.md index 1fded51..711dd66 100644 --- a/docs/providers/README.md +++ b/docs/providers/README.md @@ -341,6 +341,9 @@ The following `dsub` parameters are specific to the `google-v2` and - `--use-private-address`: - If set to true, do not attach a public IP address to the VM. (default: False) + - `--block-external-network`: + - If set to true, prevents the container for the user's script/command + from accessing the external network. (default: False) - Per-task compute resources - `--boot-disk-size`: diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 698243e..cd74974 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -348,7 +348,7 @@ $ dstat \ ## Viewing logs Each `dsub` task produces log files whose destination is determined by the `--logging` flag. -See [Logging](https://github.com/DataBiosphere/dsub/blob/master/docs/logging.md) +See [Logging](https://github.com/DataBiosphere/dsub/blob/main/docs/logging.md) for more information. ## SSH to the VM diff --git a/dsub/_dsub_version.py b/dsub/_dsub_version.py index dc93142..c1c51da 100644 --- a/dsub/_dsub_version.py +++ b/dsub/_dsub_version.py @@ -26,4 +26,4 @@ 0.1.3.dev0 -> 0.1.3 -> 0.1.4.dev0 -> ... """ -DSUB_VERSION = '0.4.3' +DSUB_VERSION = '0.4.4' diff --git a/dsub/commands/dsub.py b/dsub/commands/dsub.py index ae5780d..23b24de 100644 --- a/dsub/commands/dsub.py +++ b/dsub/commands/dsub.py @@ -564,6 +564,13 @@ def _parse_arguments(prog, argv): action='store_true', help="""If set to true, enables Stackdriver monitoring on the VM. (default: False)""") + google_common.add_argument( + '--block-external-network', + default=False, + action='store_true', + help="""If set to true, prevents the container for the user's + script/command from accessing the external network. + (default: False)""") google_cls_v2 = parser.add_argument_group( title='"google-cls-v2" provider options', @@ -633,7 +640,8 @@ def _get_job_resources(args): ssh=args.ssh, enable_stackdriver_monitoring=args.enable_stackdriver_monitoring, max_retries=args.retries, - max_preemptible_attempts=args.preemptible) + max_preemptible_attempts=args.preemptible, + block_external_network=args.block_external_network) def _get_job_metadata(provider, user_id, job_name, script, task_ids, diff --git a/dsub/lib/job_model.py b/dsub/lib/job_model.py index b5bd5a8..c262248 100644 --- a/dsub/lib/job_model.py +++ b/dsub/lib/job_model.py @@ -436,6 +436,7 @@ class Resources( 'enable_stackdriver_monitoring', 'max_retries', 'max_preemptible_attempts', + 'block_external_network', ])): """Job resource parameters related to CPUs, memory, and disk. @@ -473,6 +474,8 @@ class Resources( max_preemptible_attempts (param_util.PreemptibleParam): Int representing maximum allowed number of attempts on a preemptible machine, or boolean representing always preemtible. + block_external_network (bool): Prevents the containers from accessing the + external network. """ __slots__ = () @@ -503,7 +506,8 @@ def __new__(cls, ssh=None, enable_stackdriver_monitoring=None, max_retries=None, - max_preemptible_attempts=None): + max_preemptible_attempts=None, + block_external_network=None): return super(Resources, cls).__new__(cls, min_cores, min_ram, machine_type, disk_size, disk_type, boot_disk_size, preemptible, image, @@ -512,7 +516,8 @@ def __new__(cls, subnetwork, use_private_address, accelerator_type, accelerator_count, nvidia_driver_version, timeout, log_interval, ssh, enable_stackdriver_monitoring, - max_retries, max_preemptible_attempts) + max_retries, max_preemptible_attempts, + block_external_network) def ensure_job_params_are_complete(job_params): diff --git a/dsub/providers/google_base.py b/dsub/providers/google_base.py index d232a32..32ef403 100644 --- a/dsub/providers/google_base.py +++ b/dsub/providers/google_base.py @@ -21,6 +21,8 @@ import re import warnings +import google.auth +from google.oauth2 import service_account import googleapiclient.discovery import googleapiclient.errors from ..lib import job_model @@ -28,9 +30,6 @@ import pytz import tenacity -import google.auth -from google.oauth2 import service_account - # The google v1 provider directly added the bigquery scope, but the v1alpha2 # API automatically added: @@ -86,12 +85,18 @@ 'asia-northeast2-a', 'asia-northeast2-b', 'asia-northeast2-c', + 'asia-northeast3-a', + 'asia-northeast3-b', + 'asia-northeast3-c', 'asia-south1-a', 'asia-south1-b', 'asia-south1-c', 'asia-southeast1-a', 'asia-southeast1-b', 'asia-southeast1-c', + 'asia-southeast2-a', + 'asia-southeast2-b', + 'asia-southeast2-c', 'australia-southeast1-a', 'australia-southeast1-b', 'australia-southeast1-c', @@ -135,6 +140,12 @@ 'us-west2-a', 'us-west2-b', 'us-west2-c', + 'us-west3-a', + 'us-west3-b', + 'us-west3-c', + 'us-west4-a', + 'us-west4-b', + 'us-west4-c', ] diff --git a/dsub/providers/google_v2_base.py b/dsub/providers/google_v2_base.py index f231ab2..d5fe1ad 100644 --- a/dsub/providers/google_v2_base.py +++ b/dsub/providers/google_v2_base.py @@ -824,6 +824,7 @@ def _build_pipeline_request(self, task_view): google_v2_pipelines.build_action( name='user-command', pid_namespace=pid_namespace, + block_external_network=job_resources.block_external_network, image_uri=job_resources.image, mounts=[mnt_datadisk] + persistent_disk_mounts, environment=user_environment, @@ -1510,6 +1511,10 @@ def get_field(self, field, default=None): # The ssh flag is determined by if an action named 'ssh' exists. value['ssh'] = self._is_ssh_enabled(self._op) + value[ + 'block-external-network'] = google_v2_operations.external_network_blocked( + self._op) + # The VM instance name and zone can be found in the WorkerAssignedEvent. # For a given operation, this may have occurred multiple times, so be # sure to grab the most recent. diff --git a/dsub/providers/google_v2_operations.py b/dsub/providers/google_v2_operations.py index f4214bc..a001e22 100644 --- a/dsub/providers/google_v2_operations.py +++ b/dsub/providers/google_v2_operations.py @@ -158,6 +158,21 @@ def get_last_event(op): return None +def external_network_blocked(op): + """Retun True if the blockExternalNetwork flag is set for the user action.""" + user_action = get_action_by_name(op, 'user-command') + if user_action: + if _API_VERSION == google_v2_versions.V2ALPHA1: + flags = user_action.get('flags') + if flags: + return 'BLOCK_EXTERNAL_NETWORK' in flags + elif _API_VERSION == google_v2_versions.V2BETA: + return user_action.get('blockExternalNetwork') + else: + assert False, 'Unexpected version: {}'.format(_API_VERSION) + return False + + def is_unexpected_exit_status_event(e): """Retun True if the event is for an unexpected exit status.""" if _API_VERSION == google_v2_versions.V2ALPHA1: diff --git a/dsub/providers/google_v2_pipelines.py b/dsub/providers/google_v2_pipelines.py index b2b0c6a..c34bbcf 100644 --- a/dsub/providers/google_v2_pipelines.py +++ b/dsub/providers/google_v2_pipelines.py @@ -166,7 +166,8 @@ def build_action(name=None, labels=None, always_run=None, enable_fuse=None, - run_in_background=None): + run_in_background=None, + block_external_network=None): """Build an Action object for a Pipeline request. Args: @@ -183,6 +184,8 @@ def build_action(name=None, always_run (bool): Action must run even if pipeline has already failed. enable_fuse (bool): Enable access to the FUSE device for this action. run_in_background (bool): Allow the action to run in the background. + block_external_network (bool): Prevents the container from accessing the + external network. Returns: An object representing an Action resource. @@ -210,6 +213,8 @@ def build_action(name=None, flags.append('ENABLE_FUSE') if run_in_background: flags.append('RUN_IN_BACKGROUND') + if block_external_network: + flags.append('BLOCK_EXTERNAL_NETWORK') if flags: action['flags'] = flags @@ -221,6 +226,7 @@ def build_action(name=None, action['alwaysRun'] = always_run action['enableFuse'] = enable_fuse action['runInBackground'] = run_in_background + action['blockExternalNetwork'] = block_external_network else: assert False, 'Unexpected version: {}'.format(_API_VERSION) diff --git a/dsub/providers/local/runner.sh b/dsub/providers/local/runner.sh index cd4a5ac..d88fa0a 100644 --- a/dsub/providers/local/runner.sh +++ b/dsub/providers/local/runner.sh @@ -94,7 +94,9 @@ function delocalize_logs_function() { readonly -f delocalize_logs_function function get_timestamp() { - python \ + # Using Python instead of /usr/bin/date because the MacOS version cannot get + # microsecond precision in the format. + "${PYTHON}" \ -c 'import datetime; print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"))' } readonly -f get_timestamp @@ -224,7 +226,11 @@ function exit_if_canceled() { readonly -f exit_if_canceled # Begin main execution -write_event "start" +PYTHON="$(which python3 || which python)" +if [[ -z "${PYTHON}" ]]; then + 1>&2 echo "ERROR: Could not find python executable" + exit 1 +fi # Trap errors and handle them instead of using errexit set +o errexit @@ -237,6 +243,8 @@ trap 'error ${LINENO} $? "Exit (undefined variable or kill?)"' EXIT # Make sure that ERR traps are inherited by shell functions set -o errtrace +write_event "start" + # Handle gcr.io images write_event "pulling-image" fetch_image_if_necessary "${IMAGE}" diff --git a/setup.py b/setup.py index ae2165a..deb0bc8 100644 --- a/setup.py +++ b/setup.py @@ -15,30 +15,30 @@ # dependencies for dsub, ddel, dstat # Pin to known working versions to prevent episodic breakage from library # version mismatches. - # This version list generated: 07/14/2020 + # This version list generated: 02/01/2021 # direct dependencies - 'google-api-python-client<=1.8.3', - 'google-auth<=1.18.0', + 'google-api-python-client<=1.12.8', + 'google-auth<=1.24.0', 'python-dateutil<=2.8.1', - 'pytz<=2019.3', - 'pyyaml<=5.3', + 'pytz<=2021.1', + 'pyyaml<=5.4.1', 'tenacity<=5.0.4', 'tabulate<=0.8.7', # downstream dependencies 'funcsigs<=1.0.2', - 'google-api-core<=1.21.0', - 'google-auth-httplib2<=0.0.3', - 'httplib2<=0.18.1', + 'google-api-core<=1.25.1', + 'google-auth-httplib2<=0.0.4', + 'httplib2<=0.19.0', 'pyasn1<=0.4.8', 'pyasn1-modules<=0.2.8', - 'rsa<=4.0', + 'rsa<=4.7', 'uritemplate<=3.0.1', # dependencies for test code - 'parameterized<=0.7.4', - 'mock<=3.0.5', + 'parameterized<=0.8.1', + 'mock<=4.0.3', ] if sys.version_info[0] == 2: diff --git a/test/integration/e2e_block_external_network.google-v2.sh b/test/integration/e2e_block_external_network.google-v2.sh new file mode 100755 index 0000000..feb4889 --- /dev/null +++ b/test/integration/e2e_block_external_network.google-v2.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +# Copyright 2021 Verily Life Sciences Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset + +# Basic test of using the --block-external-network flag +# No input files. +# No output files. +# The stderr log file is checked for expected errors due to no network. + +readonly SCRIPT_DIR="$(dirname "${0}")" + +# Do standard test setup +source "${SCRIPT_DIR}/test_setup_e2e.sh" + +echo "Launching pipeline..." + +set +o errexit + +# Run gsutil with Boto:num_retries=0 option. Otherwise, gsutil will retry up to +# 24 times due to the network error +# https://stackoverflow.com/questions/44459685/sql-server-agent-job-and-gsutil +JOB_ID="$(run_dsub \ + --image 'gcr.io/google.com/cloudsdktool/cloud-sdk:327.0.0-slim' \ + --block-external-network \ + --script "${SCRIPT_DIR}/script_block_external_network.sh" \ + --retries 1 \ + --wait)" +if [[ $? -eq 0 ]]; then + 1>&2 echo "dsub did not report the failure as it should have." + exit 1 +fi +set -o errexit + +echo +echo "Checking stderr of both attempts..." + +# Check the results +readonly ATTEMPT_1_STDERR_LOG="$(dirname "${LOGGING}")/${TEST_NAME}.1-stderr.log" +readonly ATTEMPT_2_STDERR_LOG="$(dirname "${LOGGING}")/${TEST_NAME}.2-stderr.log" + +for STDERR_LOG_FILE in "${ATTEMPT_1_STDERR_LOG}" "${ATTEMPT_2_STDERR_LOG}" ; do + RESULT="$(gsutil cat "${STDERR_LOG_FILE}")" + if ! echo "${RESULT}" | grep -qi "Unable to find the server at storage.googleapis.com"; then + 1>&2 echo "Network error from gsutil not found in the dsub stderr log!" + 1>&2 echo "${RESULT}" + exit 1 + fi + + if ! echo "${RESULT}" | grep -qi "Could not resolve host: google.com"; then + 1>&2 echo "Network error from curl not found in the dsub stderr log!" + 1>&2 echo "${RESULT}" + exit 1 + fi +done + +echo +echo "Checking dstat output..." +ATTEMPT_1_DSTAT_OUTPUT=$(run_dstat --attempts 1 --status 'FAILURE' --full --jobs "${JOB_ID}" 2>&1); +ATTEMPT_2_DSTAT_OUTPUT=$(run_dstat --attempts 2 --status 'FAILURE' --full --jobs "${JOB_ID}" 2>&1); +for DSTAT_OUTPUT in "${ATTEMPT_1_DSTAT_OUTPUT}" "${ATTEMPT_1_DSTAT_OUTPUT}" ; do + if ! echo "${DSTAT_OUTPUT}" | grep -qi "block-external-network: true"; then + 1>&2 echo "block-external-network not found in dstat output!" + 1>&2 echo "${DSTAT_OUTPUT}" + exit 1 + fi +done + +echo +echo "stderr log contains the expected errors." +echo "dstat output contains the expected block-external-network flag." +echo "SUCCESS" + diff --git a/test/integration/script_block_external_network.sh b/test/integration/script_block_external_network.sh new file mode 100755 index 0000000..a2b20e5 --- /dev/null +++ b/test/integration/script_block_external_network.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Copyright 2021 Verily Life Sciences Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Intended to be used by e2e_block_external_network.sh + +set -o errexit +set -o nounset + +RC=0 + +if ! gsutil -o 'Boto:num_retries=0' ls gs://genomics-public-data; then + 1>&2 echo "\`gsutil ls\` should not have succeeded" + RC=1 +fi + +if ! curl google.com; then + 1>&2 echo "\`curl google.com\` should not have succeeded" + RC=1 +fi + +exit "${RC}" diff --git a/test/integration/unit_flags.google-v2.sh b/test/integration/unit_flags.google-v2.sh index 9885430..48dd4f2 100755 --- a/test/integration/unit_flags.google-v2.sh +++ b/test/integration/unit_flags.google-v2.sh @@ -29,7 +29,7 @@ source "${SCRIPT_DIR}/test_setup_unit.sh" function call_dsub() { local image="${DOCKER_IMAGE_OVERRIDE:-dummy-image}" - + dsub \ --provider "${DSUB_PROVIDER}" \ --project "${PROJECT_ID}" \ @@ -758,6 +758,53 @@ function test_no_stackdriver() { } readonly -f test_no_stackdriver +function test_block_external_network() { + local subtest="${FUNCNAME[0]}" + + if call_dsub \ + --command 'echo "${TEST_NAME}"' \ + --regions us-central1 \ + --block-external-network; then + + # Check that the output contains expected values + if [[ "${DSUB_PROVIDER}" == "google-cls-v2" ]]; then + assert_err_value_equals \ + "[0].pipeline.actions.[3].blockExternalNetwork" "True" + elif [[ "${DSUB_PROVIDER}" == "google-v2" ]]; then + assert_err_value_equals \ + "[0].pipeline.actions.[3].flags.[0]" "BLOCK_EXTERNAL_NETWORK" + fi + + test_passed "${subtest}" + else + test_failed "${subtest}" + fi +} +readonly -f test_block_external_network + +function test_no_block_external_network() { + local subtest="${FUNCNAME[0]}" + + if call_dsub \ + --command 'echo "${TEST_NAME}"' \ + --regions us-central1; then + + # Check that the output does not contain block network flag + if [[ "${DSUB_PROVIDER}" == "google-cls-v2" ]]; then + assert_err_value_equals \ + "[0].pipeline.actions.[3].blockExternalNetwork" "False" + elif [[ "${DSUB_PROVIDER}" == "google-v2" ]]; then + assert_err_not_contains \ + "BLOCK_EXTERNAL_NETWORK" + fi + + test_passed "${subtest}" + else + test_failed "${subtest}" + fi +} +readonly -f test_no_block_external_network + # Run the tests trap "exit_handler" EXIT @@ -827,3 +874,7 @@ test_no_disk_type echo test_stackdriver test_no_stackdriver + +echo +test_block_external_network +test_no_block_external_network