Skip to content

Commit 36a5d01

Browse files
feat(e2e): add backward compatibility e2e tests
Signed-off-by: Monika Jakhar <[email protected]>
1 parent a69c888 commit 36a5d01

File tree

4 files changed

+272
-30
lines changed

4 files changed

+272
-30
lines changed

.github/scripts/build-all-images.sh

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,39 @@
11
#!/bin/bash
22
set -e
33

4-
function get_image_tag() {
4+
get_image_tag() {
55
version=$(grep "^VERSION := " ./Makefile)
6-
version=${version#VERSION := }
6+
version="${version#VERSION := }"
77

88
git_sha=$(git rev-parse --short HEAD || echo "HEAD")
9-
export IMAGE_TAG=${version}-${git_sha}
9+
export IMAGE_TAG="${version}-${git_sha}"
1010
}
1111

12-
function build_images() {
12+
build_images() {
1313
images=(
14-
${IMG_REPO}/dataset-controller:${IMAGE_TAG}
15-
${IMG_REPO}/application-controller:${IMAGE_TAG}
16-
${IMG_REPO}/alluxioruntime-controller:${IMAGE_TAG}
17-
${IMG_REPO}/jindoruntime-controller:${IMAGE_TAG}
18-
${IMG_REPO}/goosefsruntime-controller:${IMAGE_TAG}
19-
${IMG_REPO}/juicefsruntime-controller:${IMAGE_TAG}
20-
${IMG_REPO}/thinruntime-controller:${IMAGE_TAG}
21-
${IMG_REPO}/efcruntime-controller:${IMAGE_TAG}
22-
${IMG_REPO}/vineyardruntime-controller:${IMAGE_TAG}
23-
${IMG_REPO}/fluid-csi:${IMAGE_TAG}
24-
${IMG_REPO}/fluid-webhook:${IMAGE_TAG}
25-
${IMG_REPO}/fluid-crd-upgrader:${IMAGE_TAG}
14+
"${IMG_REPO}/dataset-controller:${IMAGE_TAG}"
15+
"${IMG_REPO}/application-controller:${IMAGE_TAG}"
16+
"${IMG_REPO}/alluxioruntime-controller:${IMAGE_TAG}"
17+
"${IMG_REPO}/jindoruntime-controller:${IMAGE_TAG}"
18+
"${IMG_REPO}/goosefsruntime-controller:${IMAGE_TAG}"
19+
"${IMG_REPO}/juicefsruntime-controller:${IMAGE_TAG}"
20+
"${IMG_REPO}/thinruntime-controller:${IMAGE_TAG}"
21+
"${IMG_REPO}/efcruntime-controller:${IMAGE_TAG}"
22+
"${IMG_REPO}/vineyardruntime-controller:${IMAGE_TAG}"
23+
"${IMG_REPO}/fluid-csi:${IMAGE_TAG}"
24+
"${IMG_REPO}/fluid-webhook:${IMAGE_TAG}"
25+
"${IMG_REPO}/fluid-crd-upgrader:${IMAGE_TAG}"
2626
)
2727

2828
make docker-build-all
2929

30-
for img in ${images[@]}; do
31-
echo "Loading image $img to kind cluster..."
32-
kind load docker-image $img --name ${KIND_CLUSTER}
30+
for img in "${images[@]}"; do
31+
echo "Loading image ${img} to kind cluster..."
32+
kind load docker-image "${img}" --name "${KIND_CLUSTER}"
3333
done
3434
}
3535

36-
function cleanup_docker_caches() {
36+
cleanup_docker_caches() {
3737
echo ">>> System disk usage after building fluid images"
3838
df -h
3939
echo ">>> Cleaning docker caches..."

.github/scripts/deploy-fluid-to-kind.sh

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,26 @@
11
#!/bin/bash
22
set -e
33

4-
function get_image_tag() {
4+
get_image_tag() {
5+
local version=""
56
version=$(grep "^VERSION := " ./Makefile)
6-
version=${version#VERSION := }
7+
version="${version#VERSION := }"
78

9+
local git_sha=""
810
git_sha=$(git rev-parse --short HEAD || echo "HEAD")
9-
export IMAGE_TAG=${version}-${git_sha}
11+
export IMAGE_TAG="${version}-${git_sha}"
1012
}
1113

12-
function deploy_fluid() {
13-
echo "Replacing image tags in values.yaml with $IMAGE_TAG"
14-
sed -i -E "s/version: &defaultVersion v[0-9]\.[0-9]\.[0-9]-[a-z0-9]+$/version: \&defaultVersion $IMAGE_TAG/g" charts/fluid/fluid/values.yaml
15-
kubectl create ns fluid-system
16-
helm install --create-namespace --set runtime.jindo.smartdata.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs --set runtime.jindo.fuse.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs fluid charts/fluid/fluid
14+
deploy_fluid() {
15+
echo "Replacing image tags in values.yaml with ${IMAGE_TAG}"
16+
sed -i -E "s/version: &defaultVersion .+$/version: \&defaultVersion ${IMAGE_TAG}/g" charts/fluid/fluid/values.yaml
17+
kubectl create ns fluid-system || true
18+
helm upgrade --install --namespace fluid-system --create-namespace --set runtime.jindo.smartdata.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs --set runtime.jindo.fuse.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs fluid charts/fluid/fluid
1719
}
1820

19-
function main() {
21+
main() {
2022
get_image_tag
21-
if [[ -z "$IMAGE_TAG" ]];then
23+
if [[ -z "${IMAGE_TAG}" ]]; then
2224
echo "Failed to get image tag, exiting..."
2325
exit 1
2426
fi
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/bin/bash
2+
3+
syslog() {
4+
echo ">>> ${1}"
5+
}
6+
7+
panic() {
8+
local err_msg="${1}"
9+
syslog "backward compatibility test failed: ${err_msg}"
10+
exit 1
11+
}
12+
13+
check_control_plane_status() {
14+
echo "=== Unique image tags used by Fluid control plane ==="
15+
kubectl get pod -n fluid-system -o jsonpath='
16+
{range .items[*]}{range .spec.containers[*]}{.image}{"\n"}{end}{range .spec.initContainers[*]}{.image}{"\n"}{end}{end}' \
17+
| sed 's/.*://' \
18+
| sort -u
19+
20+
# Timeout counter (30 minutes = 360*5 seconds)
21+
local timeout=360
22+
local counter=0
23+
local status_interval=36
24+
25+
while true; do
26+
total_pods=$(kubectl get pod -n fluid-system --no-headers | grep -cv "Completed")
27+
running_pods=$(kubectl get pod -n fluid-system --no-headers | grep -c "Running")
28+
not_running_pods=$((total_pods - running_pods))
29+
30+
if ((counter % status_interval == 0)); then
31+
syslog "[Status Check $((counter / status_interval))] Pod status: ${running_pods}/${total_pods} running (${not_running_pods} not ready)"
32+
if [[ "${not_running_pods}" -gt 0 ]]; then
33+
echo "=== Not running pods ==="
34+
kubectl get pods -n fluid-system \
35+
--field-selector=status.phase!=Running \
36+
-o=custom-columns='NAME:.metadata.name,STATUS:.status.phase,REASON:.status.reason'
37+
fi
38+
fi
39+
40+
if [[ "${total_pods}" -ne 0 ]] && [[ "${total_pods}" -eq "${running_pods}" ]]; then
41+
break
42+
fi
43+
44+
if [[ "${counter}" -ge "${timeout}" ]]; then
45+
panic "Timeout waiting for control plane after ${counter} checks!"
46+
fi
47+
48+
sleep 5
49+
((counter++))
50+
done
51+
syslog "Fluid control plane is ready after ${counter} checks!"
52+
}
53+
54+
wait_dataset_bound() {
55+
local dataset_name="${1}"
56+
local deadline=180
57+
local log_interval=0
58+
local log_times=0
59+
while true; do
60+
# Use || true to prevent script from exiting if field is missing
61+
last_state=$(kubectl get dataset "${dataset_name}" -ojsonpath='{.status.phase}' 2>/dev/null || echo "")
62+
63+
if [[ "${log_interval}" -eq 3 ]]; then
64+
((log_times++))
65+
syslog "checking dataset.status.phase==Bound (already $((log_times * 3 * 5))s, last state: ${last_state:-None})"
66+
if [[ $((log_times * 3 * 5)) -ge "${deadline}" ]]; then
67+
panic "timeout for ${deadline}s waiting for dataset bound!"
68+
fi
69+
log_interval=0
70+
fi
71+
72+
if [[ "${last_state}" == "Bound" ]]; then
73+
break
74+
fi
75+
((log_interval++))
76+
sleep 5
77+
done
78+
syslog "Found dataset ${dataset_name} status.phase==Bound"
79+
}
80+
81+
wait_job_completed() {
82+
local job_name="${1}"
83+
local deadline=600 # 10 minutes
84+
local counter=0
85+
while true; do
86+
# Handle missing fields gracefully
87+
succeed=$(kubectl get job "${job_name}" -ojsonpath='{.status.succeeded}' 2>/dev/null || echo "0")
88+
failed=$(kubectl get job "${job_name}" -ojsonpath='{.status.failed}' 2>/dev/null || echo "0")
89+
90+
# Ensure variables are treated as integers
91+
[[ -z "${succeed}" ]] && succeed=0
92+
[[ -z "${failed}" ]] && failed=0
93+
94+
if [[ "${failed}" -gt 0 ]]; then
95+
panic "job ${job_name} failed when accessing data"
96+
fi
97+
if [[ "${succeed}" -gt 0 ]]; then
98+
break
99+
fi
100+
101+
((counter++))
102+
if [[ $((counter * 5)) -ge "${deadline}" ]]; then
103+
panic "timeout for ${deadline}s waiting for job ${job_name} completion!"
104+
fi
105+
sleep 5
106+
done
107+
syslog "Found succeeded job ${job_name}"
108+
}
109+
110+
setup_old_fluid() {
111+
syslog "Setting up older version of Fluid from charts"
112+
helm repo add fluid https://fluid-cloudnative.github.io/charts
113+
helm repo update fluid
114+
115+
# We ignore errors in case namespace exists
116+
kubectl create ns fluid-system || true
117+
118+
helm install fluid fluid/fluid --namespace fluid-system --wait
119+
check_control_plane_status
120+
}
121+
122+
create_dataset() {
123+
syslog "Creating alluxio dataset..."
124+
kubectl apply -f test/gha-e2e/alluxio/dataset.yaml
125+
# give it 10s to let the API server and controller settle
126+
sleep 10
127+
wait_dataset_bound "zookeeper"
128+
}
129+
130+
upgrade_fluid() {
131+
syslog "Upgrading Fluid to the locally built current version..."
132+
./.github/scripts/deploy-fluid-to-kind.sh
133+
check_control_plane_status
134+
}
135+
136+
verify_backward_compatibility() {
137+
syslog "Verifying backward compatibility..."
138+
# Ensure the dataset created earlier is still bound
139+
wait_dataset_bound "zookeeper"
140+
141+
# create job to access data over the runtime
142+
kubectl apply -f test/gha-e2e/alluxio/job.yaml
143+
wait_job_completed "fluid-test"
144+
145+
# Clean up
146+
kubectl delete -f test/gha-e2e/alluxio/
147+
}
148+
149+
main() {
150+
set -e
151+
syslog "[BACKWARD COMPATIBILITY TEST STARTS AT $(date)]"
152+
153+
setup_old_fluid
154+
create_dataset
155+
upgrade_fluid
156+
verify_backward_compatibility
157+
158+
syslog "[BACKWARD COMPATIBILITY TEST SUCCEEDED AT $(date)]"
159+
}
160+
161+
main
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
name: E2E Backward Compatibility Check
2+
on:
3+
pull_request:
4+
branches: [master, release-*]
5+
paths-ignore:
6+
- "docs/**"
7+
- "addons/**"
8+
- "sdk/**"
9+
- "static/**"
10+
11+
permissions:
12+
contents: read
13+
actions: read
14+
15+
concurrency:
16+
group: ${{ github.workflow }}-${{ github.ref }}
17+
cancel-in-progress: true
18+
19+
env:
20+
GO_VERSION: 1.24.12
21+
22+
jobs:
23+
backward-compat-test:
24+
runs-on: ubuntu-latest
25+
strategy:
26+
fail-fast: false
27+
matrix:
28+
kubernetes-version:
29+
["v1.33.2", "v1.30.13", "v1.28.15", "v1.24.17", "v1.22.17"]
30+
env:
31+
GOPATH: ${{ github.workspace }}
32+
GO111MODULE: auto
33+
KIND_CLUSTER: fluid-cluster
34+
defaults:
35+
run:
36+
working-directory: ${{ env.GOPATH }}/src/github.com/fluid-cloudnative/fluid
37+
38+
steps:
39+
- name: Set up Go
40+
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
41+
with:
42+
go-version: ${{ env.GO_VERSION }}
43+
44+
- name: Set up Helm
45+
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
46+
47+
- name: Checkout code
48+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
49+
with:
50+
path: ${{ env.GOPATH }}/src/github.com/fluid-cloudnative/fluid
51+
52+
- name: Create k8s Kind Cluster
53+
uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # v1.13.0
54+
with:
55+
version: v0.29.0
56+
node_image: kindest/node:${{ matrix.kubernetes-version }}
57+
cluster_name: ${{ env.KIND_CLUSTER }}
58+
kubectl_version: ${{ matrix.kubernetes-version }}
59+
60+
- name: Build current fluid docker images
61+
env:
62+
IMG_REPO: fluidcloudnative
63+
run: |
64+
echo ">>> System disk usage before build fluid images"
65+
df -h
66+
./.github/scripts/build-all-images.sh
67+
68+
- name: Run backward compatibility e2e tests
69+
timeout-minutes: 40
70+
run: |
71+
bash ./.github/scripts/gha-backward-compatibility.sh
72+
73+
- name: Dump environment
74+
if: ${{ !cancelled() }}
75+
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
76+
with:
77+
name: gha-backward-compat-logs-${{ github.job }}-${{ matrix.kubernetes-version }}
78+
path: "src/github.com/fluid-cloudnative/fluid/e2e-tmp/testcase-*.tgz"
79+
retention-days: 14

0 commit comments

Comments
 (0)