diff --git a/.github/workflows/set_rook_ceph_cluster.yaml b/.github/workflows/set_rook_ceph_cluster.yaml new file mode 100644 index 00000000..0733c991 --- /dev/null +++ b/.github/workflows/set_rook_ceph_cluster.yaml @@ -0,0 +1,49 @@ +name: Plugin Go test +on: + pull_request: + branches: + - main + - release-* + +defaults: + run: + # reference: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#using-a-specific-shell + shell: bash --noprofile --norc -eo pipefail -x {0} + +# cancel the in-progress workflow when PR is refreshed. +concurrency: + group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }} + cancel-in-progress: true + +jobs: + default-namespace: + runs-on: ubuntu-latest + steps: + - name: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: consider debugging + uses: ./.github/workflows/tmate_debug + with: + use-tmate: ${{ secrets.USE_TMATE }} + + - name: Setup Minikube + run: | + tests/github-action-helper.sh install_minikube_with_none_driver v1.28.4 + + - name: print k8s cluster status + run: | + minikube status + kubectl get nodes + + - name: use local disk + run: tests/github-action-helper.sh use_local_disk + + - name: deploy rook cluster + run: tests/github-action-helper.sh deploy_rook diff --git a/.github/workflows/tmate_debug/action.yml b/.github/workflows/tmate_debug/action.yml new file mode 100644 index 00000000..b6a32e5f --- /dev/null +++ b/.github/workflows/tmate_debug/action.yml @@ -0,0 +1,24 @@ +name: "Tmate debugging tests" +description: "Setup tmate session if the test fails" +inputs: + use-tmate: + description: "boolean for enabling TMATE" + required: true +runs: + using: "composite" + steps: + - name: consider debugging + shell: bash --noprofile --norc -eo pipefail -x {0} + if: runner.debug || contains(github.event.pull_request.labels.*.name, 'debug-ci') + run: | + # Enable tmate only in the Rook fork, where the USE_TMATE secret is set in the repo, or if the action is re-run + if [ "$GITHUB_REPOSITORY_OWNER" = "rook" ] || [ -n "${{ inputs.use-tmate }}" ] || [ "$GITHUB_RUN_ATTEMPT" -gt 1 ]; then + echo USE_TMATE=1 >> $GITHUB_ENV + fi + + - name: set up tmate session for debugging + if: env.USE_TMATE + uses: mxschmitt/action-tmate@v3 + with: + limit-access-to-actor: false + detached: true diff --git a/test/collect-logs.sh b/test/collect-logs.sh new file mode 100755 index 00000000..b6f8984f --- /dev/null +++ b/test/collect-logs.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +set -x + +# User parameters +: "${CLUSTER_NAMESPACE:="rook-ceph"}" +: "${OPERATOR_NAMESPACE:="$CLUSTER_NAMESPACE"}" +: "${LOG_DIR:="test"}" + +LOG_DIR="${LOG_DIR%/}" # remove trailing slash if necessary +mkdir -p "${LOG_DIR}" + +CEPH_CMD="kubectl -n ${CLUSTER_NAMESPACE} exec deploy/rook-ceph-tools -- ceph --connect-timeout 10" + +$CEPH_CMD -s >"${LOG_DIR}"/ceph-status.txt +$CEPH_CMD osd dump >"${LOG_DIR}"/ceph-osd-dump.txt +$CEPH_CMD report >"${LOG_DIR}"/ceph-report.txt + +NAMESPACES=("$CLUSTER_NAMESPACE") +if [[ "$OPERATOR_NAMESPACE" != "$CLUSTER_NAMESPACE" ]]; then + NAMESPACES+=("$OPERATOR_NAMESPACE") +fi + +for NAMESPACE in "${NAMESPACES[@]}"; do + # each namespace is a sub-directory for easier debugging + NS_DIR="${LOG_DIR}"/namespace-"${NAMESPACE}" + mkdir "${NS_DIR}" + + # describe every one of the k8s resources in the namespace which rook commonly uses + for KIND in 'pod' 'deployment' 'job' 'daemonset' 'cm'; do + kubectl -n "$NAMESPACE" get "$KIND" -o wide >"${NS_DIR}"/"$KIND"-list.txt + for resource in $(kubectl -n "$NAMESPACE" get "$KIND" -o jsonpath='{.items[*].metadata.name}'); do + kubectl -n "$NAMESPACE" describe "$KIND" "$resource" >"${NS_DIR}"/"$KIND"-describe--"$resource".txt + + # collect logs for pods along the way + if [[ "$KIND" == 'pod' ]]; then + kubectl -n "$NAMESPACE" logs --all-containers "$resource" >"${NS_DIR}"/logs--"$resource".txt + fi + done + done + + # secret need `-oyaml` to read the content instead of `describe` since secrets `describe` will be encrypted. + # so keeping it in a different block. + for secret in $(kubectl -n "$NAMESPACE" get secrets -o jsonpath='{.items[*].metadata.name}'); do + kubectl -n "$NAMESPACE" get -o yaml secret "$secret" >"${NS_DIR}"/secret-describe--"$secret".txt + done + + # describe every one of the custom resources in the namespace since all should be rook-related and + # they aren't captured by 'kubectl get all' + for CRD in $(kubectl get crds -o jsonpath='{.items[*].metadata.name}'); do + for resource in $(kubectl -n "$NAMESPACE" get "$CRD" -o jsonpath='{.items[*].metadata.name}'); do + crd_main_type="${CRD%%.*}" # e.g., for cephclusters.ceph.rook.io, only use 'cephclusters' + kubectl -n "$NAMESPACE" get -o yaml "$CRD" "$resource" >"${NS_DIR}"/"$crd_main_type"-describe--"$resource".txt + done + done + + # do simple 'get all' calls for resources we don't often want to look at + kubectl get all -n "$NAMESPACE" -o wide >"${NS_DIR}"/all-wide.txt + kubectl get all -n "$NAMESPACE" -o yaml >"${NS_DIR}"/all-yaml.txt +done + +sudo lsblk | sudo tee -a "${LOG_DIR}"/lsblk.txt +journalctl -o short-precise --dmesg >"${LOG_DIR}"/dmesg.txt +journalctl >"${LOG_DIR}"/journalctl.txt diff --git a/test/github-action-helper.sh b/test/github-action-helper.sh new file mode 100755 index 00000000..7f0e5de6 --- /dev/null +++ b/test/github-action-helper.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash + +set -xeEo pipefail + +############# +# VARIABLES # +############# +: "${FUNCTION:=${1}}" + +# source https://github.com/rook/rook +function find_extra_block_dev() { + # shellcheck disable=SC2005 # redirect doesn't work with sudo, so use echo + echo "$(sudo lsblk)" >/dev/stderr # print lsblk output to stderr for debugging in case of future errors + # relevant lsblk --pairs example: (MOUNTPOINT identifies boot partition)(PKNAME is Parent dev ID) + # NAME="sda15" SIZE="106M" TYPE="part" MOUNTPOINT="/boot/efi" PKNAME="sda" + # NAME="sdb" SIZE="75G" TYPE="disk" MOUNTPOINT="" PKNAME="" + # NAME="sdb1" SIZE="75G" TYPE="part" MOUNTPOINT="/mnt" PKNAME="sdb" + boot_dev="$(sudo lsblk --noheading --list --output MOUNTPOINT,PKNAME | grep boot | awk '{print $2}')" + echo " == find_extra_block_dev(): boot_dev='$boot_dev'" >/dev/stderr # debug in case of future errors + # --nodeps ignores partitions + extra_dev="$(sudo lsblk --noheading --list --nodeps --output KNAME | grep -v loop | grep -v "$boot_dev" | head -1)" + echo " == find_extra_block_dev(): extra_dev='$extra_dev'" >/dev/stderr # debug in case of future errors + echo "$extra_dev" # output of function +} + +: "${BLOCK:=$(find_extra_block_dev)}" + +# source https://github.com/rook/rook +use_local_disk() { + BLOCK_DATA_PART="/dev/${BLOCK}1" + sudo apt purge snapd -y + sudo dmsetup version || true + sudo swapoff --all --verbose + sudo umount /mnt + # search for the device since it keeps changing between sda and sdb + sudo wipefs --all --force "$BLOCK_DATA_PART" +} + +deploy_rook() { + kubectl create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/common.yaml + kubectl create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/crds.yaml + kubectl create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/operator.yaml + wait_for_operator_pod_to_be_ready_state + curl https://raw.githubusercontent.com/rook/rook/master/deploy/examples/cluster-test.yaml -o cluster-test.yaml + sed -i "s|#deviceFilter:|deviceFilter: ${BLOCK/\/dev\//}|g" cluster-test.yaml + sed -i '0,/count: 1/ s/count: 1/count: 3/' cluster-test.yaml + kubectl create -f cluster-test.yaml + wait_for_three_mons + wait_for_pod_to_be_ready_state + kubectl create -f https://raw.githubusercontent.com/rook/rook/master/deploy/examples/toolbox.yaml +} + +wait_for_pod_to_be_ready_state() { + timeout 200 bash <<-'EOF' + until [ $(kubectl get pod -l app=rook-ceph-osd -n rook-ceph -o jsonpath='{.items[*].metadata.name}' -o custom-columns=READY:status.containerStatuses[*].ready | grep -c true) -eq 1 ]; do + echo "waiting for the pods to be in ready state" + sleep 1 + done +EOF + timeout_command_exit_code +} + +wait_for_operator_pod_to_be_ready_state() { + timeout 100 bash <<-'EOF' + until [ $(kubectl get pod -l app=rook-ceph-operator -n rook-ceph -o jsonpath='{.items[*].metadata.name}' -o custom-columns=READY:status.containerStatuses[*].ready | grep -c true) -eq 1 ]; do + echo "waiting for the operator to be in ready state" + sleep 1 + done +EOF + timeout_command_exit_code +} + +wait_for_three_mons() { + timeout 150 bash <<-'EOF' + until [ $(kubectl -n rook-ceph get deploy -l app=rook-ceph-mon,mon_canary!=true | grep rook-ceph-mon | wc -l | awk '{print $1}' ) -eq 3 ]; do + echo "$(date) waiting for three mon deployments to exist" + sleep 2 + done +EOF + timeout_command_exit_code +} + +timeout_command_exit_code() { + # timeout command return exit status 124 if command times out + if [ $? -eq 124 ]; then + echo "Timeout reached" + exit 1 + fi +} + +install_minikube_with_none_driver() { + CRICTL_VERSION="v1.28.0" + MINIKUBE_VERSION="v1.31.2" + + sudo apt update + sudo apt install -y conntrack socat + curl -LO https://storage.googleapis.com/minikube/releases/$MINIKUBE_VERSION/minikube_latest_amd64.deb + sudo dpkg -i minikube_latest_amd64.deb + rm -f minikube_latest_amd64.deb + + curl -LO https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.4/cri-dockerd_0.3.4.3-0.ubuntu-focal_amd64.deb + sudo dpkg -i cri-dockerd_0.3.4.3-0.ubuntu-focal_amd64.deb + rm -f cri-dockerd_0.3.4.3-0.ubuntu-focal_amd64.deb + + wget https://github.com/kubernetes-sigs/cri-tools/releases/download/$CRICTL_VERSION/crictl-$CRICTL_VERSION-linux-amd64.tar.gz + sudo tar zxvf crictl-$CRICTL_VERSION-linux-amd64.tar.gz -C /usr/local/bin + rm -f crictl-$CRICTL_VERSION-linux-amd64.tar.gz + sudo sysctl fs.protected_regular=0 + + CNI_PLUGIN_VERSION="v1.3.0" + CNI_PLUGIN_TAR="cni-plugins-linux-amd64-$CNI_PLUGIN_VERSION.tgz" # change arch if not on amd64 + CNI_PLUGIN_INSTALL_DIR="/opt/cni/bin" + + curl -LO "https://github.com/containernetworking/plugins/releases/download/$CNI_PLUGIN_VERSION/$CNI_PLUGIN_TAR" + sudo mkdir -p "$CNI_PLUGIN_INSTALL_DIR" + sudo tar -xf "$CNI_PLUGIN_TAR" -C "$CNI_PLUGIN_INSTALL_DIR" + rm "$CNI_PLUGIN_TAR" + + export MINIKUBE_HOME=$HOME CHANGE_MINIKUBE_NONE_USER=true KUBECONFIG=$HOME/.kube/config + sudo -E minikube start --kubernetes-version="$1" --driver=none --memory 6g --cpus=2 --addons ingress --cni=calico +} + +######## +# MAIN # +######## + +FUNCTION="$1" +shift # remove function arg now that we've recorded it +# call the function with the remainder of the user-provided args +if ! $FUNCTION "$@"; then + echo "Call to $FUNCTION was not successful" >&2 + exit 1 +fi