Upgrade KubeRay to 1.2.2 #1938
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: e2e | |
on: | |
pull_request: | |
branches: | |
- main | |
- 'release-*' | |
paths-ignore: | |
- 'docs/**' | |
- '**.adoc' | |
- '**.md' | |
- 'LICENSE' | |
push: | |
branches: | |
- main | |
- 'release-*' | |
paths-ignore: | |
- 'docs/**' | |
- '**.adoc' | |
- '**.md' | |
- 'LICENSE' | |
concurrency: | |
group: ${{ github.head_ref }}-${{ github.workflow }} | |
cancel-in-progress: true | |
jobs: | |
kubernetes-e2e: | |
runs-on: gpu-t4-4-core | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- name: Checkout common repo code | |
uses: actions/checkout@v4 | |
with: | |
repository: 'project-codeflare/codeflare-common' | |
ref: 'main' | |
path: 'common' | |
- name: Set Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version-file: './go.mod' | |
- name: Set up gotestfmt | |
uses: gotesttools/gotestfmt-action@v2 | |
with: | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- name: Setup NVidia GPU environment for KinD | |
uses: ./common/github-actions/nvidia-gpu-setup | |
- name: Setup and start KinD cluster | |
id: kind-install | |
uses: ./common/github-actions/kind | |
- name: Install NVidia GPU operator for KinD | |
uses: ./common/github-actions/nvidia-gpu-operator | |
- name: Deploy CodeFlare stack | |
id: deploy | |
run: | | |
echo Setting up CodeFlare stack | |
make setup-e2e | |
echo Deploying CodeFlare operator | |
IMG=localhost/codeflare-operator:test | |
make image-build -e IMG="${IMG}" | |
podman save -o cfo.tar ${IMG} | |
kind load image-archive cfo.tar --name cluster --verbosity 1000 | |
make deploy -e IMG="${IMG}" -e ENV="e2e" | |
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager | |
env: | |
# cgroup-manager configuration is required for NVidia image used in e2e PR check | |
IMAGE_BUILD_FLAGS: '--cgroup-manager cgroupfs' | |
- name: Run e2e tests | |
run: | | |
export CODEFLARE_TEST_TIMEOUT_SHORT=3m | |
export CODEFLARE_TEST_TIMEOUT_MEDIUM=15m | |
export CODEFLARE_TEST_TIMEOUT_LONG=20m | |
export CODEFLARE_TEST_TIMEOUT_GPU_PROVISIONING=30m | |
export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }} | |
set -euo pipefail | |
go test -timeout 120m -v -skip "^Test.*Cpu$" ./test/e2e -json 2>&1 | tee ${CODEFLARE_TEST_OUTPUT_DIR}/gotest.log | gotestfmt | |
- name: Print CodeFlare operator logs | |
if: always() && steps.deploy.outcome == 'success' | |
run: | | |
echo "Printing CodeFlare operator logs" | |
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log | |
- name: Print Kueue operator logs | |
if: always() && steps.deploy.outcome == 'success' | |
run: | | |
echo "Printing Kueue operator logs" | |
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}') | |
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log | |
- name: Print KubeRay operator logs | |
if: always() && steps.deploy.outcome == 'success' | |
run: | | |
echo "Printing KubeRay operator logs" | |
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log | |
- name: Export all KinD pod logs | |
uses: ./common/github-actions/kind-export-logs | |
if: always() && steps.kind-install.outcome == 'success' | |
with: | |
output-directory: ${TEMP_DIR} | |
- name: Upload logs | |
uses: actions/upload-artifact@v4 | |
if: always() && steps.kind-install.outcome == 'success' | |
with: | |
name: logs | |
retention-days: 10 | |
path: | | |
${{ env.TEMP_DIR }}/**/*.log | |
- name: Post notification about failure to a Slack channel in case of push event | |
if: failure() && github.event_name == 'push' | |
uses: slackapi/[email protected] | |
with: | |
channel-id: "codeflare-nightlies" | |
slack-message: "e2e test on push failed, <https://github.com/project-codeflare/codeflare-operator/actions/workflows/e2e_tests.yaml|View workflow runs>" | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} |