From 41ad400d325243342b11e1d55232b34bbd590b8c Mon Sep 17 00:00:00 2001 From: David Dunleavy Date: Wed, 4 Sep 2024 10:13:06 -0700 Subject: [PATCH] Create goldens and `diff_test` for commands for all builds in `build.py` Will use this in a followup to not require docker image without changing already existing builds PiperOrigin-RevId: 671012377 --- build_tools/ci/BUILD | 43 +++++++++++++++ build_tools/ci/build.py | 17 ++++++ build_tools/ci/generated_build_commands.txt | 61 +++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 build_tools/ci/BUILD create mode 100644 build_tools/ci/generated_build_commands.txt diff --git a/build_tools/ci/BUILD b/build_tools/ci/BUILD new file mode 100644 index 0000000000000..961e556f48fd3 --- /dev/null +++ b/build_tools/ci/BUILD @@ -0,0 +1,43 @@ +# Copyright 2024 The OpenXLA Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +load("@bazel_skylib//rules:diff_test.bzl", "diff_test") +load("//xla:pytype.default.bzl", "pytype_strict_binary") + +package( + # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], + licenses = ["notice"], +) + +pytype_strict_binary( + name = "build", + srcs = ["build.py"], +) + +genrule( + name = "generated_build_commands", + outs = ["generated_commands.txt"], + cmd = "KOKORO_JOB_NAME=GOLDENS $(location //build_tools/ci:build) > $(OUTS)", + tags = ["not_run:arm"], + tools = [":build"], +) + +diff_test( + name = "build_command_golden_test", + failure_message = """Regenerate with `KOKORO_JOB_NAME=GOLDENS PYTHONDONTWRITEBYTECODE=1 python3 build.py > generated_commands.txt`.""", + file1 = "generated_commands.txt", + file2 = ":generated_build_commands", + tags = ["not_run:arm"], +) diff --git a/build_tools/ci/build.py b/build_tools/ci/build.py index f1bb6309068e4..2d90c66593d19 100755 --- a/build_tools/ci/build.py +++ b/build_tools/ci/build.py @@ -353,10 +353,27 @@ def nvidia_gpu_build_with_compute_capability( } +def dump_all_build_commands(): + """Used to generate what commands are run for each build.""" + # Awkward workaround b/c Build instances are not hashable + type_to_build = {b.type_: b for b in _KOKORO_JOB_NAME_TO_BUILD_MAP.values()} + for t in sorted(type_to_build.keys(), key=str): + build = type_to_build[t] + sys.stdout.write(f"# BEGIN {build.type_}\n") + for cmd in build.commands(): + sys.stdout.write(" ".join(cmd) + "\n") + sys.stdout.write(f"# END {build.type_}\n") + + def main(): logging.basicConfig() logging.getLogger().setLevel(logging.INFO) kokoro_job_name = os.getenv("KOKORO_JOB_NAME") + + if kokoro_job_name == "GOLDENS": # HACK!! + dump_all_build_commands() + return + build = _KOKORO_JOB_NAME_TO_BUILD_MAP[kokoro_job_name] for cmd in build.commands(): diff --git a/build_tools/ci/generated_build_commands.txt b/build_tools/ci/generated_build_commands.txt new file mode 100644 index 0000000000000..d4ff25e92b32d --- /dev/null +++ b/build_tools/ci/generated_build_commands.txt @@ -0,0 +1,61 @@ +# BEGIN BuildType.CPU_ARM64 +./github/xla/.kokoro/generate_index_html.sh index.html +docker pull us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build-arm64:jax-latest-multi-python +docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/xla us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build-arm64:jax-latest-multi-python bash +docker exec xla_ci bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-not_run:arm --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-not_run:arm --config=warnings --config=rbe_cross_compile_linux_arm64_xla --config=nonccl --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --build_tests_only -- //xla/... //build_tools/... @tsl//tsl/... +docker exec xla_ci bazel analyze-profile profile.json.gz +docker stop xla_ci +# END BuildType.CPU_ARM64 +# BEGIN BuildType.CPU_X86 +./github/xla/.kokoro/generate_index_html.sh index.html +parallel --ungroup --retries 3 --delay 15 docker pull ::: gcr.io/tensorflow-sigs/build:latest-python3.11 +docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/xla gcr.io/tensorflow-sigs/build:latest-python3.11 bash +docker exec xla_ci bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --config=warnings --config=nonccl --config=rbe_linux_cpu --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/... +docker exec xla_ci bazel analyze-profile profile.json.gz +docker stop xla_ci +# END BuildType.CPU_X86 +# BEGIN BuildType.GPU +./github/xla/.kokoro/generate_index_html.sh index.html +nvidia-smi +parallel --ungroup --retries 3 --delay 15 docker pull ::: gcr.io/tensorflow-sigs/build:latest-python3.11 +docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/xla gcr.io/tensorflow-sigs/build:latest-python3.11 bash +docker exec xla_ci bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-amd --config=warnings --config=rbe_linux_cuda_nvcc --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @tsl//tsl/... +docker exec xla_ci bazel analyze-profile profile.json.gz +docker stop xla_ci +# END BuildType.GPU +# BEGIN BuildType.JAX_CPU +./github/xla/.kokoro/generate_index_html.sh index.html +git clone --depth=1 https://github.com/google/jax ./github/jax +parallel --ungroup --retries 3 --delay 15 docker pull ::: gcr.io/tensorflow-sigs/build:latest-python3.11 +docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/jax gcr.io/tensorflow-sigs/build:latest-python3.11 bash +docker exec xla_ci bazel test --build_tag_filters= --test_tag_filters= --config=avx_posix --config=mkl_open_source_only --config=rbe_cpu_linux_py3.12 --config=tensorflow_testing_rbe_linux --test_env=JAX_NUM_GENERATED_CASES=25 --test_env=JAX_SKIP_SLOW_TESTS=1 --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --override_repository=xla=/github/xla -- //tests:cpu_tests //tests:backend_independent_tests +docker exec xla_ci bazel analyze-profile profile.json.gz +docker stop xla_ci +# END BuildType.JAX_CPU +# BEGIN BuildType.JAX_GPU +./github/xla/.kokoro/generate_index_html.sh index.html +git clone --depth=1 https://github.com/google/jax ./github/jax +parallel --ungroup --retries 3 --delay 15 docker pull ::: gcr.io/tensorflow-sigs/build:latest-python3.11 +docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/jax gcr.io/tensorflow-sigs/build:latest-python3.11 bash +docker exec xla_ci bazel test --build_tag_filters=-multiaccelerator --test_tag_filters=-multiaccelerator --config=avx_posix --config=mkl_open_source_only --config=rbe_linux_cuda12.3_nvcc_py3.10 --config=tensorflow_testing_rbe_linux --test_env=JAX_SKIP_SLOW_TESTS=1 --test_env=TF_CPP_MIN_LOG_LEVEL=0 --test_env=JAX_EXCLUDE_TEST_TARGETS=PmapTest.testSizeOverflow --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --override_repository=xla=/github/xla -- //tests:gpu_tests //tests:backend_independent_tests +docker exec xla_ci bazel analyze-profile profile.json.gz +docker stop xla_ci +# END BuildType.JAX_GPU +# BEGIN BuildType.TENSORFLOW_CPU +./github/xla/.kokoro/generate_index_html.sh index.html +git clone --depth=1 https://github.com/tensorflow/tensorflow ./github/tensorflow +parallel --ungroup --retries 3 --delay 15 docker pull ::: gcr.io/tensorflow-sigs/build:latest-python3.11 +docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/tensorflow gcr.io/tensorflow-sigs/build:latest-python3.11 bash +docker exec xla_ci bazel test --build_tag_filters= --test_tag_filters= --config=release_cpu_linux --config=rbe_linux_cpu --config=linux_cpu_pycpp_test_filters --verbose_failures --test_output=errors --override_repository=xla=/github/xla --profile=profile.json.gz -- //tensorflow/compiler/... -//tensorflow/compiler/tf2tensorrt/... //tensorflow/python/... -//tensorflow/python/distribute/... -//tensorflow/python/compiler/tensorrt/... +docker exec xla_ci bazel analyze-profile profile.json.gz +docker stop xla_ci +# END BuildType.TENSORFLOW_CPU +# BEGIN BuildType.TENSORFLOW_GPU +./github/xla/.kokoro/generate_index_html.sh index.html +git clone --depth=1 https://github.com/tensorflow/tensorflow ./github/tensorflow +parallel --ungroup --retries 3 --delay 15 docker pull ::: gcr.io/tensorflow-sigs/build:latest-python3.11 +docker run --detach --name=xla_ci --rm --interactive --tty --volume=./github:/github --workdir=/github/tensorflow gcr.io/tensorflow-sigs/build:latest-python3.11 bash +docker exec xla_ci bazel test --build_tag_filters=-no_oss,+gpu --test_tag_filters=-no_oss,+gpu --config=release_gpu_linux --config=rbe_linux_cuda --config=linux_cuda_pycpp_test_filters --verbose_failures --test_output=errors --override_repository=xla=/github/xla --profile=profile.json.gz -- //tensorflow/compiler/... -//tensorflow/compiler/tf2tensorrt/... //tensorflow/python/... -//tensorflow/python/distribute/... -//tensorflow/python/compiler/tensorrt/... +docker exec xla_ci bazel analyze-profile profile.json.gz +docker stop xla_ci +# END BuildType.TENSORFLOW_GPU