Nightly -- Full Network Tests #594

Workflow file for this run

.github/workflows/nightly.yml at 5f45d8c

	name: Nightly -- Full Network Tests

	on:
	schedule:
	- cron: "0 0 * * *"
	workflow_dispatch:

	env:
	CARGO_INCREMENTAL: 0 # bookkeeping for incremental builds has overhead, not useful in CI.
	WORKFLOW_URL: https://github.com/maidsafe/stableset_net/actions/runs

	jobs:
	e2e:
	name: E2E tests
	runs-on: ${{ matrix.os }}
	strategy:
	matrix:
	include:
	- os: ubuntu-latest
	safe_path: /home/runner/.local/share/safe
	- os: windows-latest
	safe_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe
	- os: macos-latest
	safe_path: /Users/runner/Library/Application\ Support/safe
	steps:
	- uses: actions/checkout@v4

	- name: Install Rust
	uses: dtolnay/rust-toolchain@stable

	- uses: Swatinem/rust-cache@v2
	continue-on-error: true

	- name: Build binaries
	run: cargo build --release --features local --bin safenode --bin autonomi
	timeout-minutes: 30

	- name: Start a local network
	uses: maidsafe/sn-local-testnet-action@main
	with:
	action: start
	enable-evm-testnet: true
	node-path: target/release/safenode
	platform: ${{ matrix.os }}
	build: true

	- name: Check if SAFE_PEERS and EVM_NETWORK are set
	shell: bash
	run: \|
	if [[ -z "$SAFE_PEERS" ]]; then
	echo "The SAFE_PEERS variable has not been set"
	exit 1
	elif [[ -z "$EVM_NETWORK" ]]; then
	echo "The EVM_NETWORK variable has not been set"
	exit 1
	else
	echo "SAFE_PEERS has been set to $SAFE_PEERS"
	echo "EVM_NETWORK has been set to $EVM_NETWORK"
	fi

	# only these unit tests require a network, the rest are run above in unit test section
	- name: Run autonomi --tests
	run: cargo test --package autonomi --tests -- --nocapture
	env:
	SN_LOG: "v"
	# only set the target dir for windows to bypass the linker issue.
	# happens if we build the node manager via testnet action
	CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' \|\| '.' }}
	timeout-minutes: 15


	# FIXME: do this in a generic way for localtestnets
	- name: export default secret key
	if: matrix.os != 'windows-latest'
	run: echo "SECRET_KEY=0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" >> $GITHUB_ENV
	shell: bash
	- name: Set secret key for Windows
	if: matrix.os == 'windows-latest'
	run: echo "SECRET_KEY=0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" \| Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
	shell: pwsh

	- name: Get file cost
	run: ./target/release/autonomi --log-output-dest=data-dir file cost "./resources"
	env:
	SN_LOG: "v"
	timeout-minutes: 15

	- name: File upload
	run: ./target/release/autonomi --log-output-dest=data-dir file upload "./resources" > ./upload_output 2>&1
	env:
	SN_LOG: "v"
	timeout-minutes: 15

	- name: parse address (unix)
	if: matrix.os != 'windows-latest'
	run: \|
	UPLOAD_ADDRESS=$(rg "At address: ([0-9a-f]*)" -o -r '$1' ./upload_output)
	echo "UPLOAD_ADDRESS=$UPLOAD_ADDRESS" >> $GITHUB_ENV
	shell: bash

	- name: parse address (win)
	if: matrix.os == 'windows-latest'
	run: \|
	$UPLOAD_ADDRESS = rg "At address: ([0-9a-f]*)" -o -r '$1' ./upload_output
	echo "UPLOAD_ADDRESS=$UPLOAD_ADDRESS" \| Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
	shell: pwsh

	- name: File Download
	run: ./target/release/autonomi --log-output-dest=data-dir file download ${{ env.UPLOAD_ADDRESS }} ./downloaded_resources
	env:
	SN_LOG: "v"
	timeout-minutes: 5

	- name: Generate register signing key
	run: ./target/release/autonomi --log-output-dest=data-dir register generate-key

	- name: Create register (writeable by owner)
	run: ./target/release/autonomi --log-output-dest=data-dir register create baobao 123 > ./register_create_output 2>&1
	env:
	SN_LOG: "v"
	timeout-minutes: 10

	- name: parse register address (unix)
	if: matrix.os != 'windows-latest'
	run: \|
	REGISTER_ADDRESS=$(rg "Register created at address: ([0-9a-f]*)" -o -r '$1' ./register_create_output)
	echo "REGISTER_ADDRESS=$REGISTER_ADDRESS" >> $GITHUB_ENV
	shell: bash

	- name: parse register address (win)
	if: matrix.os == 'windows-latest'
	run: \|
	$REGISTER_ADDRESS = rg "Register created at address: ([0-9a-f]*)" -o -r '$1' ./register_create_output
	echo "REGISTER_ADDRESS=$REGISTER_ADDRESS" \| Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
	shell: pwsh

	- name: Get register
	run: ./target/release/autonomi --log-output-dest=data-dir register get ${{ env.REGISTER_ADDRESS }}
	env:
	SN_LOG: "v"
	timeout-minutes: 5

	- name: Edit register
	run: ./target/release/autonomi --log-output-dest=data-dir register edit ${{ env.REGISTER_ADDRESS }} 456
	env:
	SN_LOG: "v"
	timeout-minutes: 10

	- name: Get register (after edit)
	run: ./target/release/autonomi --log-output-dest=data-dir register get ${{ env.REGISTER_ADDRESS }}
	env:
	SN_LOG: "v"
	timeout-minutes: 5

	- name: Create Public Register (writeable by anyone)
	run: ./target/release/autonomi --log-output-dest=data-dir register create bao 111 --public > ./register_public_create_output 2>&1
	env:
	SN_LOG: "v"
	timeout-minutes: 5

	- name: parse public register address (unix)
	if: matrix.os != 'windows-latest'
	run: \|
	PUBLIC_REGISTER_ADDRESS=$(rg "Register created at address: ([0-9a-f]*)" -o -r '$1' ./register_public_create_output)
	echo "PUBLIC_REGISTER_ADDRESS=$PUBLIC_REGISTER_ADDRESS" >> $GITHUB_ENV
	shell: bash

	- name: parse public register address (win)
	if: matrix.os == 'windows-latest'
	run: \|
	$PUBLIC_REGISTER_ADDRESS = rg "Register created at address: ([0-9a-f]*)" -o -r '$1' ./register_public_create_output
	echo "PUBLIC_REGISTER_ADDRESS=$PUBLIC_REGISTER_ADDRESS" \| Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
	shell: pwsh

	- name: Get Public Register (current key is the owner)
	run: ./target/release/autonomi --log-output-dest=data-dir register get ${{ env.PUBLIC_REGISTER_ADDRESS }}
	env:
	SN_LOG: "v"
	timeout-minutes: 5

	- name: Edit Public Register (current key is the owner)
	run: ./target/release/autonomi --log-output-dest=data-dir register edit ${{ env.PUBLIC_REGISTER_ADDRESS }} 222
	env:
	SN_LOG: "v"
	timeout-minutes: 10

	- name: Delete current register signing key
	shell: bash
	run: rm -rf ${{ matrix.safe_path }}/autonomi

	- name: Generate new register signing key
	run: ./target/release/autonomi --log-output-dest=data-dir register generate-key

	- name: Get Public Register (new signing key is not the owner)
	run: ./target/release/autonomi --log-output-dest=data-dir register get ${{ env.PUBLIC_REGISTER_ADDRESS }}
	env:
	SN_LOG: "v"
	timeout-minutes: 2

	- name: Edit Public Register (new signing key is not the owner)
	run: ./target/release/autonomi --log-output-dest=data-dir register edit ${{ env.PUBLIC_REGISTER_ADDRESS }} 333
	env:
	SN_LOG: "v"
	timeout-minutes: 10

	- name: Get Public Register (new signing key is not the owner)
	run: ./target/release/autonomi --log-output-dest=data-dir register get ${{ env.PUBLIC_REGISTER_ADDRESS }}
	env:
	SN_LOG: "v"
	timeout-minutes: 2

	- name: Stop the local network and upload logs
	if: always()
	uses: maidsafe/sn-local-testnet-action@main
	with:
	action: stop
	log_file_prefix: safe_test_logs_e2e
	platform: ${{ matrix.os }}

	- name: post notification to slack on failure
	if: ${{ failure() }}
	uses: bryannice/[email protected]
	env:
	SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
	SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
	SLACK_TITLE: "Nightly E2E Test Run Failed"

	full_unit:
	name: Full Unit Tests (including proptests)
	runs-on: ${{ matrix.os }}
	strategy:
	matrix:
	os: [ubuntu-latest, windows-latest, macos-latest]
	steps:
	- uses: actions/checkout@v4

	- name: Install Rust
	uses: dtolnay/rust-toolchain@stable

	- uses: Swatinem/rust-cache@v2
	continue-on-error: true

	- name: Build unit tests before running
	run: cargo test --release --lib --bins --no-run
	timeout-minutes: 30

	- name: Run node tests
	timeout-minutes: 25
	run: cargo test --release --package sn_node --lib

	- name: Run network tests
	timeout-minutes: 25
	run: cargo test --release --package sn_networking --features="open-metrics"

	- name: Run protocol tests
	timeout-minutes: 25
	run: cargo test --release --package sn_protocol

	- name: Run transfers tests
	timeout-minutes: 25
	run: cargo test --release --package sn_transfers

	- name: Run logging tests
	timeout-minutes: 25
	run: cargo test --release --package sn_logging

	- name: Run register tests
	timeout-minutes: 50
	run: cargo test --release --package sn_registers
	env:
	PROPTEST_CASES: 512

	- name: post notification to slack on failure
	if: ${{ failure() }}
	uses: bryannice/[email protected]
	env:
	SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
	SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
	SLACK_TITLE: "Nightly Unit Test Run Failed"

	churn:
	name: Network churning tests
	runs-on: ${{ matrix.os }}
	strategy:
	matrix:
	include:
	- os: ubuntu-latest
	node_data_path: /home/runner/.local/share/safe/node
	safe_path: /home/runner/.local/share/safe
	- os: windows-latest
	node_data_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe\\node
	safe_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe
	- os: macos-latest
	node_data_path: /Users/runner/Library/Application Support/safe/node
	safe_path: /Users/runner/Library/Application Support/safe
	steps:
	- uses: actions/checkout@v4

	- name: Install Rust
	uses: dtolnay/rust-toolchain@stable

	- uses: Swatinem/rust-cache@v2
	continue-on-error: true

	- name: Build binaries
	run: cargo build --release --features local --bin safenode
	timeout-minutes: 30

	- name: Build churn tests
	run: cargo test --release -p sn_node --features=local --test data_with_churn --no-run
	env:
	# only set the target dir for windows to bypass the linker issue.
	# happens if we build the node manager via testnet action
	CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' \|\| '.' }}
	timeout-minutes: 30

	- name: Start a local network
	uses: maidsafe/sn-local-testnet-action@main
	with:
	action: start
	enable-evm-testnet: true
	node-path: target/release/safenode
	platform: ${{ matrix.os }}
	build: true

	- name: Chunks data integrity during nodes churn (during 10min) (in theory)
	run: cargo test --release -p sn_node --features=local --test data_with_churn -- --nocapture
	env:
	TEST_DURATION_MINS: 60
	TEST_CHURN_CYCLES: 6
	SN_LOG: "all"
	CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' \|\| '.' }}
	timeout-minutes: 90

	- name: Stop the local network and upload logs
	if: always()
	uses: maidsafe/sn-local-testnet-action@main
	with:
	action: stop
	log_file_prefix: safe_test_logs_churn
	platform: ${{ matrix.os }}


	- name: Get total node count
	shell: bash
	timeout-minutes: 1
	run: \|
	node_count=$(ls "${{ matrix.node_data_path }}" \| wc -l)
	echo "Node dir count is $node_count"

	- name: Get restart of nodes using rg
	shell: bash
	timeout-minutes: 1
	# get the counts, then the specific line, and then the digit count only
	# then check we have an expected level of restarts
	# TODO: make this use an env var, or relate to testnet size
	run: \|
	restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats \| \
	rg "(\d+) matches" \| rg "\d+" -o)
	echo "Restarted $restart_count nodes"

	- name: Get peers removed from nodes using rg
	shell: bash
	timeout-minutes: 1
	run: \|
	peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats \| \
	rg "(\d+) matches" \| rg "\d+" -o) \|\| { echo "Failed to extract peer removal count"; exit 1; }
	if [ -z "$peer_removed" ]; then
	echo "No peer removal count found"
	exit 1
	fi
	echo "PeerRemovedFromRoutingTable $peer_removed times"

	- name: Verify peers removed exceed restarted node counts
	shell: bash
	timeout-minutes: 1
	# get the counts, then the specific line, and then the digit count only
	# then check we have an expected level of restarts
	# TODO: make this use an env var, or relate to testnet size
	run: \|
	restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats \| \
	rg "(\d+) matches" \| rg "\d+" -o)
	echo "Restart $restart_count nodes"
	peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats \| \
	rg "(\d+) matches" \| rg "\d+" -o)
	echo "PeerRemovedFromRoutingTable $peer_removed times"
	if [ $peer_removed -lt $restart_count ]; then
	echo "PeerRemovedFromRoutingTable times of: $peer_removed is less than the restart count of: $restart_count"
	exit 1
	fi

	# TODO: reenable this once the testnet dir creation is tidied up to avoid a large count here
	# if [ $restart_count -lt $node_count ]; then
	# echo "Restart count of: $restart_count is less than the node count of: $node_count"
	# exit 1
	# fi

	- name: Verify data replication using rg
	shell: bash
	timeout-minutes: 1
	# get the counts, then the specific line, and then the digit count only
	# then check we have an expected level of replication
	# TODO: make this use an env var, or relate to testnet size
	run: \|
	fetching_attempt_count=$(rg "FetchingKeysForReplication" "${{ matrix.node_data_path }}" -c --stats \| \
	rg "(\d+) matches" \| rg "\d+" -o)
	echo "Carried out $fetching_attempt_count fetching attempts"
	node_count=$(ls "${{ matrix.node_data_path }}" \| wc -l)
	if [ $fetching_attempt_count -lt $node_count ]; then
	echo "Replication fetching attempts of: $fetching_attempt_count is less than the node count of: $node_count"
	exit 1
	fi

	- name: post notification to slack on failure
	if: ${{ failure() }}
	uses: bryannice/[email protected]
	env:
	SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
	SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
	SLACK_TITLE: "Nightly Churn Test Run Failed"

	# Only error out after uploading the logs
	- name: Don't log raw data
	if: matrix.os != 'windows-latest' # causes error
	shell: bash
	timeout-minutes: 10
	run: \|
	if ! rg '^' "${{ matrix.safe_path }}"///logs \| awk 'length($0) > 15000 { print; exit 1 }'
	then
	echo "We are logging an extremely large data"
	exit 1
	fi

	verify_data_location_routing_table:
	name: Verify data location and Routing Table
	runs-on: ${{ matrix.os }}
	strategy:
	matrix:
	include:
	- os: ubuntu-latest
	node_data_path: /home/runner/.local/share/safe/node
	safe_path: /home/runner/.local/share/safe
	- os: windows-latest
	node_data_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe\\node
	safe_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe
	- os: macos-latest
	node_data_path: /Users/runner/Library/Application Support/safe/node
	safe_path: /Users/runner/Library/Application Support/safe
	steps:
	- uses: actions/checkout@v4

	- name: Install Rust
	uses: dtolnay/rust-toolchain@stable

	- uses: Swatinem/rust-cache@v2
	continue-on-error: true

	- name: Build binaries
	run: cargo build --release --features local --bin safenode
	timeout-minutes: 30

	- name: Build data location and routing table tests
	run: cargo test --release -p sn_node --features=local --test verify_data_location --test verify_routing_table --no-run
	env:
	# only set the target dir for windows to bypass the linker issue.
	# happens if we build the node manager via testnet action
	CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' \|\| '.' }}
	timeout-minutes: 30

	- name: Start a local network
	uses: maidsafe/sn-local-testnet-action@main
	with:
	action: start
	enable-evm-testnet: true
	node-path: target/release/safenode
	platform: ${{ matrix.os }}
	build: true

	- name: Verify the Routing table of the nodes
	run: cargo test --release -p sn_node --features=local --test verify_routing_table -- --nocapture
	env:
	CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' \|\| '.' }}
	timeout-minutes: 5

	- name: Verify the location of the data on the network
	run: cargo test --release -p sn_node --features=local --test verify_data_location -- --nocapture
	env:
	SN_LOG: "all"
	CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' \|\| '.' }}
	timeout-minutes: 90

	- name: Verify the routing tables of the nodes
	run: cargo test --release -p sn_node --features=local --test verify_routing_table -- --nocapture
	env:
	CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' \|\| '.' }}
	timeout-minutes: 5

	- name: Stop the local network and upload logs
	if: always()
	uses: maidsafe/sn-local-testnet-action@main
	with:
	action: stop
	log_file_prefix: safe_test_logs_data_location
	platform: ${{ matrix.os }}

	- name: Verify restart of nodes using rg
	shell: bash
	timeout-minutes: 1
	# get the counts, then the specific line, and then the digit count only
	# then check we have an expected level of restarts
	# TODO: make this use an env var, or relate to testnet size
	run: \|
	restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats \| \
	rg "(\d+) matches" \| rg "\d+" -o)
	echo "Restart $restart_count nodes"
	peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats \| \
	rg "(\d+) matches" \| rg "\d+" -o)
	echo "PeerRemovedFromRoutingTable $peer_removed times"
	if [ $peer_removed -lt $restart_count ]; then
	echo "PeerRemovedFromRoutingTable times of: $peer_removed is less than the restart count of: $restart_count"
	exit 1
	fi
	node_count=$(ls "${{ matrix.node_data_path }}" \| wc -l)
	echo "Node dir count is $node_count"

	- name: post notification to slack on failure
	if: ${{ failure() }}
	uses: bryannice/[email protected]
	env:
	SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
	SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
	SLACK_TITLE: "Nightly Data Location Test Run Failed"

	# Only error out after uploading the logs
	- name: Don't log raw data
	if: matrix.os != 'windows-latest' # causes error
	shell: bash
	timeout-minutes: 10
	run: \|
	if ! rg '^' "${{ matrix.safe_path }}"///logs \| awk 'length($0) > 15000 { print; exit 1 }'
	then
	echo "We are logging an extremely large data"
	exit 1
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Nightly -- Full Network Tests #594

Workflow file

Nightly -- Full Network Tests #594

Jobs

Run details

Workflow file for this run