Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,21 @@ jobs:
run: |
kubectl wait --for=condition=available --timeout=300s deployment/rbgs-controller-manager -n rbgs-system

- name: Pre-load e2e test images
run: |
# Pull and load e2e test images to avoid image pull timeout during tests
E2E_IMAGES=(
"registry.cn-hangzhou.aliyuncs.com/acs-sample/nginx:latest"
"registry-cn-hangzhou.ack.aliyuncs.com/dev/patio-runtime:v0.2.0"
)
for image in "${E2E_IMAGES[@]}"; do
echo "Pulling image: ${image}"
docker pull "${image}"
echo "Loading image into Kind: ${image}"
kind load docker-image "${image}" --name rbgs-e2e
done
echo "All e2e test images loaded successfully"

- name: Run E2E tests
run: |
make test-e2e
Expand Down
8 changes: 4 additions & 4 deletions internal/controller/workloads/discovery_config_mode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func TestEnsureDiscoveryConfigMode(t *testing.T) {

tests := []testCase{
{
name: "missing annotation with legacy role configmap should set legacy mode and requeue",
name: "missing annotation with legacy role configmap should set legacy mode without requeue",
buildExtraObjects: func(rbg *workloadsv1alpha1.RoleBasedGroup) []runtime.Object {
return []runtime.Object{
&corev1.ConfigMap{
Expand All @@ -41,13 +41,13 @@ func TestEnsureDiscoveryConfigMode(t *testing.T) {
},
}
},
wantRequeue: true,
wantRequeue: false,
wantMode: workloadsv1alpha1.LegacyDiscoveryConfigMode,
wantModeAnnotation: true,
},
{
name: "missing annotation without legacy signal should set refine mode and requeue",
wantRequeue: true,
name: "missing annotation without legacy signal should set refine mode without requeue",
wantRequeue: false,
wantMode: workloadsv1alpha1.RefineDiscoveryConfigMode,
wantModeAnnotation: true,
},
Expand Down
19 changes: 18 additions & 1 deletion internal/controller/workloads/pod_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ func (r *PodReconciler) setRestartCondition(

setCondition(rbg, restartCondition)

rbgApplyConfig := ToRBGApplyConfigurationForStatus(rbg)
// Only patch the conditions, not the RoleStatuses.
// RoleStatuses should be managed by RBG controller only.
rbgApplyConfig := toRBGApplyConfigurationForConditionsOnly(rbg)

return utils.PatchObjectApplyConfiguration(ctx, r.client, rbgApplyConfig, utils.PatchStatus)
}
Expand Down Expand Up @@ -169,6 +171,21 @@ func ToRBGApplyConfigurationForStatus(rbg *workloadsv1alpha1.RoleBasedGroup) *ap
return rbgApplyConfig
}

// toRBGApplyConfigurationForConditionsOnly creates an apply configuration that only updates conditions.
// This is used by pod_controller to avoid overwriting RoleStatuses managed by RBG controller.
func toRBGApplyConfigurationForConditionsOnly(rbg *workloadsv1alpha1.RoleBasedGroup) *applyconfiguration.RoleBasedGroupApplyConfiguration {
if rbg == nil {
return nil
}
gkv := utils.GetRbgGVK()
rbgApplyConfig := applyconfiguration.RoleBasedGroup(rbg.Name, rbg.Namespace).
WithKind(gkv.Kind).
WithAPIVersion(gkv.GroupVersion().String()).
WithStatus(applyconfiguration.RoleBasedGroupStatus().
WithConditions(ToConditionApplyConfigurations(rbg.Status.Conditions)...))
return rbgApplyConfig
}

func ToRoleStatusApplyConfiguration(roleStatus []workloadsv1alpha1.RoleStatus) []*applyconfiguration.RoleStatusApplyConfiguration {
out := make([]*applyconfiguration.RoleStatusApplyConfiguration, 0, len(roleStatus))
for _, rs := range roleStatus {
Expand Down
37 changes: 22 additions & 15 deletions internal/controller/workloads/rolebasedgroup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,31 +147,36 @@ func (r *RoleBasedGroupReconciler) Reconcile(ctx context.Context, req ctrl.Reque
return ctrl.Result{}, err
}

// Step 3: Construct role statuses
// Step 3: Initialize discovery config mode.
// IMPORTANT: This must be done BEFORE constructAndUpdateRoleStatuses, because
// shouldUseLegacyDiscoveryConfig checks rbg.Status.RoleStatuses to determine if
// this is a new RBG. If roleStatuses is populated first, it will incorrectly
// use legacy mode for new RBGs.
if needRequeue, err := r.ensureDiscoveryConfigMode(ctx, rbg); err != nil || needRequeue {
return ctrl.Result{Requeue: true}, err
}

// Step 4: Reconcile refined discovery ConfigMap.
// This must happen before reconcileRoles to ensure ConfigMap exists before workloads are created.
if err := r.reconcileRefinedDiscoveryConfigMap(ctx, rbg); err != nil {
r.recorder.Event(rbg, corev1.EventTypeWarning, FailedReconcileDiscoveryConfigMap, err.Error())
return ctrl.Result{}, err
}

// Step 5: Construct role statuses
roleStatuses, err := r.constructAndUpdateRoleStatuses(ctx, rbg)
if err != nil {
return ctrl.Result{}, err
}

// Step 4: Calculate coordination strategies for scaling and rolling update
// Step 6: Calculate coordination strategies for scaling and rolling update
// TODO: This is a simple method consolidation for now.
// The coordination logic will be refactored later to improve extensibility and readability.
scalingTargets, rollingUpdateStrategies, err := r.handleCoordinationStrategies(ctx, rbg, roleStatuses)
if err != nil {
return ctrl.Result{}, err
}

// Step 5: Initialize discovery config mode.
if needRequeue, err := r.ensureDiscoveryConfigMode(ctx, rbg); err != nil || needRequeue {
return ctrl.Result{Requeue: true}, err
}

// Step 6: Reconcile refined discovery ConfigMap.
if err := r.reconcileRefinedDiscoveryConfigMap(ctx, rbg); err != nil {
r.recorder.Event(rbg, corev1.EventTypeWarning, FailedReconcileDiscoveryConfigMap, err.Error())
return ctrl.Result{}, err
}

// Step 7: Reconcile roles, do create/update actions for roles.
if err := r.reconcileRoles(ctx, rbg, expectedRolesRevisionHash, scalingTargets, rollingUpdateStrategies); err != nil {
return ctrl.Result{}, err
Expand Down Expand Up @@ -273,7 +278,7 @@ func (r *RoleBasedGroupReconciler) ensureDiscoveryConfigMode(
return false, nil
}

// determine mode, update annotation and then requeue
// determine mode, update annotation
legacy, err := r.shouldUseLegacyDiscoveryConfig(ctx, rbg)
if err != nil {
return true, err
Expand All @@ -291,7 +296,9 @@ func (r *RoleBasedGroupReconciler) ensureDiscoveryConfigMode(
}

log.FromContext(ctx).Info("Initialized discovery config mode", "mode", mode)
return true, nil
// Don't requeue here - continue to reconcile ConfigMap and workloads in the same loop
// to avoid race condition where workload is created before ConfigMap exists
return false, nil
}

func (r *RoleBasedGroupReconciler) shouldUseLegacyDiscoveryConfig(
Expand Down
25 changes: 25 additions & 0 deletions test/e2e/framework/rbg_expect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"

"github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -94,6 +95,9 @@ func (f *Framework) ExpectRbgEqual(rbg *v1alpha1.RoleBasedGroup) {
}

func (f *Framework) ExpectRbgDeleted(rbg *v1alpha1.RoleBasedGroup) {
logger := log.FromContext(f.Ctx).WithValues("rbg", rbg.Name)

// Check RBG is deleted
newRbg := &v1alpha1.RoleBasedGroup{}
gomega.Eventually(
func() bool {
Expand All @@ -108,6 +112,27 @@ func (f *Framework) ExpectRbgDeleted(rbg *v1alpha1.RoleBasedGroup) {

}, utils.Timeout, utils.Interval,
).Should(gomega.BeTrue())

// Check all Pods managed by this RBG are deleted
gomega.Eventually(
func() bool {
podList := &corev1.PodList{}
err := f.Client.List(
f.Ctx, podList,
client.InNamespace(rbg.Namespace),
client.MatchingLabels{v1alpha1.SetNameLabelKey: rbg.Name},
)
if err != nil {
logger.Error(err, "failed to list pods")
return false
}
if len(podList.Items) > 0 {
logger.V(1).Info("waiting for pods to be deleted", "remainingPods", len(podList.Items))
return false
}
return true
}, utils.Timeout, utils.Interval,
).Should(gomega.BeTrue())
}

func (f *Framework) ExpectRbgCondition(
Expand Down
Loading
Loading