Skip to content

Commit

Permalink
Merge branch 'master' into java-csv-json
Browse files Browse the repository at this point in the history
  • Loading branch information
robertwb committed Sep 22, 2023
2 parents f0322f8 + c5e6c79 commit 2005dff
Show file tree
Hide file tree
Showing 435 changed files with 17,557 additions and 3,824 deletions.
2 changes: 2 additions & 0 deletions .github/actions/setup-action/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,5 @@ runs:
shell: bash
run: |
echo KUBELET_GCLOUD_CONFIG_PATH=/var/lib/kubelet/pods/$POD_UID/volumes/kubernetes.io~empty-dir/gcloud >> $GITHUB_ENV
- name: Setup environment
uses: ./.github/actions/setup-environment-action
Original file line number Diff line number Diff line change
Expand Up @@ -15,47 +15,42 @@
# specific language governing permissions and limitations
# under the License.

name: 'Setup environment for self-hosted runners'
description: 'Setup action to run jobs in a self-hosted runner'
name: 'Setup environment action'
description: 'Setup environment to run jobs'
inputs:
requires-py-38:
python-version:
required: false
description: 'Set as false if does not require py38 setup'
default: 'true'
requires-py-39:
description: 'Install Python version'
default: ''
java-version:
required: false
description: 'Set as false if does not require py39 setup'
default: 'true'
requires-java-8:
description: 'Install Java version'
default: ''
go-version:
required: false
description: 'Set as false if does not require java-8 setup'
default: 'true'
requires-go:
required: false
description: 'Set as false if does not require go setup'
default: 'true'
description: 'Install Go version'
default: ''

runs:
using: "composite"
steps:
- name: Install python 3.8
if: ${{ inputs.requires-py-38 == 'true' }}
uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install python 3.9
if: ${{ inputs.requires-py-39 == 'true' }}
- name: Install Python
if: ${{ inputs.python-version != '' }}
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Set Java Version
if: ${{ inputs.requires-java-8 == 'true' }}
python-version: ${{ inputs.python-version }}
- name: Install Java
if: ${{ inputs.java-version != '' }}
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: 8
- name: Set Go Version
if: ${{ inputs.requires-go == 'true' }}
java-version: ${{ inputs.java-version }}
- name: Setup Gradle
uses: gradle/gradle-build-action@v2
with:
cache-read-only: false
- name: Install Go
if: ${{ inputs.go-version != '' }}
uses: actions/setup-go@v3
with:
go-version: '1.21' # never set patch, to get latest patch releases.
go-version: ${{ inputs.go-version }} # never set patch, to get latest patch releases.
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ main_runner = {
runner_image = "us-central1-docker.pkg.dev/apache-beam-testing/beam-github-actions/beam-arc-runner:60d397ecfbd2b10a1929615c70d500eb71a2c053"
machine_type = "e2-standard-16"
min_node_count = "1"
max_node_count = "16"
max_node_count = "24"
min_replicas = "1"
max_replicas = "128"
max_replicas = "200"
webhook_scaling = true
disk_size_gb = 200
requests = {
Expand Down
8 changes: 7 additions & 1 deletion .github/gh-actions-self-hosted-runners/arc/images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,15 @@ RUN curl -OL https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-c
rm google-cloud-sdk-367.0.0-linux-x86_64.tar.gz && \
mv google-cloud-sdk /usr/local/google-cloud-sdk && \
/usr/local/google-cloud-sdk/install.sh --quiet && \
/usr/local/google-cloud-sdk/bin/gcloud components install kubectl && \
/usr/local/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin && \
#revert permission
chown -R runner:runner /home/runner/.config
ENV USE_GKE_GCLOUD_AUTH_PLUGIN=True
ENV PATH="${PATH}:/usr/local/google-cloud-sdk/bin"
#Install Kubectl
RUN curl -OL https://dl.k8s.io/release/v1.28.1/bin/linux/amd64/kubectl && \
chmod +x ./kubectl && \
mv ./kubectl /usr/local/bin/kubectl
#Install Apache Maven
RUN curl -OL https://dlcdn.apache.org/maven/maven-3/3.9.4/binaries/apache-maven-3.9.4-bin.tar.gz && \
tar -xvf apache-maven-3.9.4-bin.tar.gz && \
Expand All @@ -73,4 +78,5 @@ ENV MAVEN_HOME="/usr/local/maven"

# Needed to transfer path addtitions to runner environment
RUN echo PATH=$PATH >> /runnertmp/.env
RUN echo USE_GKE_GCLOUD_AUTH_PLUGIN=$USE_GKE_GCLOUD_AUTH_PLUGIN >> /runnertmp/.env
USER runner
108 changes: 97 additions & 11 deletions .github/workflows/README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion .github/workflows/assign_milestone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
issues: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 2

Expand Down
84 changes: 84 additions & 0 deletions .github/workflows/beam_CancelStaleDataflowJobs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

name: Cancel Stale Dataflow Jobs

on:
issue_comment:
types: [created]
schedule:
- cron: '0 */4 * * *'
workflow_dispatch:

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}'
cancel-in-progress: true

env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}

#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: read
checks: read
contents: read
deployments: read
id-token: none
issues: read
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read

jobs:
beam_CancelStaleDataflowJobs:
name: ${{matrix.job_name}} (${{matrix.job_phrase}})
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 120
strategy:
matrix:
job_name: [beam_CancelStaleDataflowJobs]
job_phrase: [Run Cancel Stale Dataflow Jobs]
if: |
github.event_name == 'schedule' ||
github.event_name == 'workflow_dispatch' ||
github.event.comment.body == 'Run Cancel Stale Dataflow Jobs'
steps:
- uses: actions/checkout@v4
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Authenticate on GCP
id: auth
uses: google-github-actions/auth@v1
with:
credentials_json: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
- name: run cancel stale dataflow jobs
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :beam-test-tools:cancelStaleDataflowJobs

126 changes: 126 additions & 0 deletions .github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Load Tests CoGBK Dataflow Streaming Java

on:
issue_comment:
types: [created]
schedule:
- cron: '50 10 * * *'
workflow_dispatch:

#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: write
checks: write
contents: read
deployments: read
id-token: none
issues: write
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}'
cancel-in-progress: true

env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}

jobs:
beam_LoadTests_Java_CoGBK_Dataflow_Streaming:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'schedule' ||
github.event.comment.body == 'Run Load Tests Java CoGBK Dataflow Streaming'
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 240
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
strategy:
matrix:
job_name: [ "beam_LoadTests_Java_CoGBK_Dataflow_Streaming" ]
job_phrase: ["Run Load Tests Java CoGBK Dataflow Streaming"]
steps:
- uses: actions/checkout@v4
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Prepare configs
id: set_configs
shell: bash
run: |
CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt | tr '\n' ' ')
echo "prepared_config_1=$CURCONFIG" >> $GITHUB_OUTPUT
CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt | tr '\n' ' ')
echo "prepared_config_2=$CURCONFIG" >> $GITHUB_OUTPUT
CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt | tr '\n' ' ')
echo "prepared_config_3=$CURCONFIG" >> $GITHUB_OUTPUT
CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt | tr '\n' ' ')
echo "prepared_config_4=$CURCONFIG" >> $GITHUB_OUTPUT
- name: run CoGBK Dataflow Streaming Java Load Test 1 (single key)
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:testing:load-tests:run
arguments: |
-PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \
-Prunner=:runners:google-cloud-dataflow-java \
'-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_1 }}' \
- name: run CoGBK Dataflow Streaming Java Load Test 2 (multiple key)
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:testing:load-tests:run
arguments: |
-PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \
-Prunner=:runners:google-cloud-dataflow-java \
'-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_2 }}' \
- name: run CoGBK Dataflow Streaming Java Load Test 3 (reiteration 10KB value)
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:testing:load-tests:run
arguments: |
-PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \
-Prunner=:runners:google-cloud-dataflow-java \
'-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_3 }}' \
- name: run CoGBK Dataflow Streaming Java Load Test 4 (reiteration 2MB value)
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:testing:load-tests:run
arguments: |
-PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \
-Prunner=:runners:google-cloud-dataflow-java \
'-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_4 }}' \
- name: Archive JUnit Test Results
uses: actions/upload-artifact@v3
if: failure()
with:
name: JUnit Test Results
path: "**/build/reports/tests/"
- name: Publish JUnit Test Results
uses: EnricoMi/publish-unit-test-result-action@v2
if: always()
with:
files: '**/build/test-results/**/*.xml'
Loading

0 comments on commit 2005dff

Please sign in to comment.