Skip to content

Commit 9c6e73c

Browse files
committed
feat: private networking
1 parent 816178f commit 9c6e73c

49 files changed

Lines changed: 2567 additions & 452 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/e2e-weekly.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,12 @@ jobs:
3737
- name: Run weekly e2e tests
3838
env:
3939
CLOUDSCALE_API_TOKEN: ${{ secrets.CLOUDSCALE_API_TOKEN }}
40+
CLOUDSCALE_NETWORK_UUID: ${{ secrets.CLOUDSCALE_NETWORK_UUID }}
4041
TAG: e2e-weekly-${{ github.sha }}
4142
run: |
42-
make test-e2e \
43-
GINKGO_LABEL_FILTER="ha || upgrade || self-hosted || kcp-remediation || conformance" \
44-
KUBETEST_CONFIGURATION=./data/kubetest/conformance-fast.yaml
43+
make test-e2e \
44+
GINKGO_LABEL_FILTER="ha || upgrade || self-hosted || kcp-remediation || conformance || byo-networking" \
45+
KUBETEST_CONFIGURATION=./data/kubetest/conformance-fast.yaml
4546
4647
- name: Install regctl
4748
if: always()

.github/workflows/test-e2e.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ on:
1818
- test-e2e-upgrade
1919
- test-e2e-self-hosted
2020
- test-e2e-md-remediation
21+
- test-e2e-byo-networking
2122
- test-e2e-conformance
2223
- test-e2e-conformance-fast
2324

@@ -53,6 +54,7 @@ jobs:
5354
- name: Run e2e tests
5455
env:
5556
CLOUDSCALE_API_TOKEN: ${{ secrets.CLOUDSCALE_API_TOKEN }}
57+
CLOUDSCALE_NETWORK_UUID: ${{ secrets.CLOUDSCALE_NETWORK_UUID }}
5658
TAG: e2e-manual-${{ github.sha }}
5759
TEST_TARGET: ${{ github.event.inputs.test_target }}
5860
run: make $TEST_TARGET

Makefile

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,17 +125,20 @@ generate-e2e-cni: ## Regenerate Cilium CNI manifest from Helm chart
125125
generate-e2e-ccm: ## Regenerate cloudscale CCM manifest
126126
@CCM_VERSION=$(CCM_VERSION) hack/generate-e2e-ccm.sh
127127

128+
E2E_CLUSTER_TEMPLATES := cluster-template \
129+
cluster-template-ha \
130+
cluster-template-upgrades \
131+
cluster-template-md-remediation \
132+
cluster-template-byo-network \
133+
cluster-template-public-lb-private-nodes \
134+
cluster-template-fip
135+
128136
.PHONY: generate-e2e-templates
129137
generate-e2e-templates: $(KUSTOMIZE) generate-e2e-cni generate-e2e-ccm ## Generate e2e cluster templates using kustomize overlays
130138
@mkdir -p $(E2E_TEMPLATES)/main
131-
@echo "Generating cluster-template.yaml..."
132-
@"$(KUSTOMIZE)" build --load-restrictor LoadRestrictionsNone $(E2E_TEMPLATES)/cluster-template > $(E2E_TEMPLATES)/main/cluster-template.yaml
133-
@echo "Generating cluster-template-ha.yaml..."
134-
@"$(KUSTOMIZE)" build --load-restrictor LoadRestrictionsNone $(E2E_TEMPLATES)/cluster-template-ha > $(E2E_TEMPLATES)/main/cluster-template-ha.yaml
135-
@echo "Generating cluster-template-upgrades.yaml..."
136-
@"$(KUSTOMIZE)" build --load-restrictor LoadRestrictionsNone $(E2E_TEMPLATES)/cluster-template-upgrades > $(E2E_TEMPLATES)/main/cluster-template-upgrades.yaml
137-
@echo "Generating cluster-template-md-remediation.yaml..."
138-
@"$(KUSTOMIZE)" build --load-restrictor LoadRestrictionsNone $(E2E_TEMPLATES)/cluster-template-md-remediation > $(E2E_TEMPLATES)/main/cluster-template-md-remediation.yaml
139+
@$(foreach tmpl,$(E2E_CLUSTER_TEMPLATES),\
140+
echo "Generating $(tmpl).yaml..." && \
141+
"$(KUSTOMIZE)" build --load-restrictor LoadRestrictionsNone $(E2E_TEMPLATES)/$(tmpl) > $(E2E_TEMPLATES)/main/$(tmpl).yaml &&) true
139142
@echo "Templates generated successfully."
140143

141144
.PHONY: generate-e2e-config
@@ -222,6 +225,19 @@ test-e2e-md-remediation: $(GINKGO) generate-e2e-templates generate-e2e-config do
222225
-e2e.skip-resource-cleanup=$(SKIP_RESOURCE_CLEANUP) \
223226
-e2e.use-existing-cluster=$(USE_EXISTING_CLUSTER)
224227

228+
.PHONY: test-e2e-byo-networking
229+
test-e2e-byo-networking: $(GINKGO) generate-e2e-templates generate-e2e-config docker-build ## Run BYO networking e2e tests
230+
$(GINKGO) -v --trace --tags=e2e \
231+
--nodes=$(GINKGO_NODES) \
232+
--label-filter="byo-networking" \
233+
--timeout=90m \
234+
--output-dir="$(E2E_ARTIFACTS_FOLDER)" --junit-report="junit.e2e_byo_networking.xml" \
235+
./test/e2e -- \
236+
-e2e.config=$(E2E_CONF_FILE) \
237+
-e2e.artifacts-folder=$(E2E_ARTIFACTS_FOLDER) \
238+
-e2e.skip-resource-cleanup=$(SKIP_RESOURCE_CLEANUP) \
239+
-e2e.use-existing-cluster=$(USE_EXISTING_CLUSTER)
240+
225241
.PHONY: test-e2e-conformance
226242
test-e2e-conformance: $(GINKGO) generate-e2e-templates generate-e2e-config docker-build ## Run K8s conformance e2e tests
227243
$(GINKGO) -v --trace --tags=e2e \

README.md

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ for [cloudscale.ch](https://www.cloudscale.ch).
88

99
## Features
1010

11-
- **CloudscaleCluster**: Network, Subnet, Load Balancer management
12-
- **CloudscaleMachine**: Server provisioning with cloud-init
11+
- **CloudscaleCluster**: Multi-network management (managed or BYO), Load Balancer (public or private VIP), Floating IP
12+
support
13+
- **CloudscaleMachine**: Server provisioning with cloud-init and configurable network interfaces
1314
- **CloudscaleMachineTemplate**: Immutable machine templates for KubeadmControlPlane/MachineDeployment
1415

1516
## Prerequisites
@@ -42,6 +43,9 @@ clusterctl generate cluster my-cluster \
4243
| kubectl apply -f -
4344
```
4445

46+
This uses the default template (public nodes, managed network). See [Cluster Templates](#cluster-templates) for other
47+
network topologies.
48+
4549
Watch the cluster come up:
4650

4751
```bash
@@ -50,15 +54,40 @@ clusterctl describe cluster my-cluster
5054

5155
## Environment Variables
5256

53-
| Variable | Description | Example |
54-
|-------------------------------------------|--------------------------------|-----------------------------------|
55-
| `CLOUDSCALE_API_TOKEN` | cloudscale.ch API token | `abc123...` |
56-
| `CLOUDSCALE_SSH_PUBLIC_KEY` | SSH public key added to nodes | `ssh-ed25519 AAAA...` |
57-
| `CLOUDSCALE_REGION` | cloudscale.ch region | `lpg` or `rma` |
58-
| `CLOUDSCALE_MACHINE_IMAGE` | Server image for nodes | `custom:ubuntu-2404-kube-v1.xx.x` |
59-
| `CLOUDSCALE_CONTROL_PLANE_MACHINE_FLAVOR` | Flavor for control plane nodes | `flex-4-2` |
60-
| `CLOUDSCALE_WORKER_MACHINE_FLAVOR` | Flavor for worker nodes | `flex-4-2` |
61-
| `CLOUDSCALE_ROOT_VOLUME_SIZE` | Root volume size in GB | `50` |
57+
| Variable | Description | Example |
58+
|-------------------------------------------|-------------------------------------------|-----------------------------------|
59+
| `CLOUDSCALE_API_TOKEN` | cloudscale.ch API token | `abc123...` |
60+
| `CLOUDSCALE_SSH_PUBLIC_KEY` | SSH public key added to nodes | `ssh-ed25519 AAAA...` |
61+
| `CLOUDSCALE_REGION` | cloudscale.ch region | `lpg` or `rma` |
62+
| `CLOUDSCALE_MACHINE_IMAGE` | Server image for nodes | `custom:ubuntu-2404-kube-v1.xx.x` |
63+
| `CLOUDSCALE_CONTROL_PLANE_MACHINE_FLAVOR` | Flavor for control plane nodes | `flex-4-2` |
64+
| `CLOUDSCALE_WORKER_MACHINE_FLAVOR` | Flavor for worker nodes | `flex-4-2` |
65+
| `CLOUDSCALE_ROOT_VOLUME_SIZE` | Root volume size in GB | `50` |
66+
| `CLOUDSCALE_NETWORK_UUID` | Existing cloudscale.ch network UUID (BYO) | `2db69ba3-...` |
67+
68+
> **Note:** `CLOUDSCALE_NETWORK_UUID` is required by the `fip`, `public-lb-private-nodes`, and `byo-network`
69+
> template flavors. It is not needed for the default template.
70+
71+
## Cluster Templates
72+
73+
CAPCS ships several cluster templates for different network topologies. Use `clusterctl generate cluster` with the
74+
`--flavor` flag to select one:
75+
76+
```bash
77+
clusterctl generate cluster my-cluster \
78+
--kubernetes-version v1.32.0 \
79+
--control-plane-machine-count 1 \
80+
--worker-machine-count 2 \
81+
--flavor <flavor-name> \
82+
| kubectl apply -f -
83+
```
84+
85+
| Flavor | Network | CP Endpoint | Node Connectivity | Extra Env Vars | Notes |
86+
|---------------------------|---------------------------|--------------------------|-------------------|---------------------------|----------------------|
87+
| *(default)* | Managed (`10.100.0.0/24`) | Public LB (DualStack) | Public + cluster || |
88+
| `fip` | BYO | Floating IP (IPv4) | Public + cluster | `CLOUDSCALE_NETWORK_UUID` | No load balancer |
89+
| `public-lb-private-nodes` | BYO + NAT | Public LB | Private only | `CLOUDSCALE_NETWORK_UUID` | Requires NAT gateway |
90+
| `byo-network` | BYO | Public LB (DualStack) | Public + cluster | `CLOUDSCALE_NETWORK_UUID` | |
6291

6392
## Development
6493

@@ -92,14 +121,16 @@ filtering and are split into suites of increasing cost, scheduled accordingly:
92121
| Cluster upgrade | `upgrade` | Rolling K8s version upgrade (v1.34 → v1.35) | < 10 min | Weekly | `test-e2e-upgrade` |
93122
| Self-hosted | `self-hosted` | clusterctl move (pivot) to workload cluster. Requires container image in public registry | < 15 min | Weekly | `test-e2e-self-hosted` |
94123
| MD remediation | `md-remediation` | MachineHealthCheck auto-replacement of unhealthy workers | < 10 min | Weekly | `test-e2e-md-remediation` |
124+
| BYO networking | `byo-networking` | BYO network: public-LB + private-nodes and floating-IP variants | < 10 min | Weekly | `test-e2e-byo-networking` |
95125
| Conformance (fast) | `conformance` | K8s conformance, skip Serial tests | < 60 min | Weekly | `test-e2e-conformance-fast` |
96126
| Conformance (full) | `conformance` | Full K8s conformance including Serial tests | < 120 min | Biweekly | `test-e2e-conformance` |
97127

98128
Durations are approximate from a real CI run; conformance varies with cluster size.
99129

100130
**Why this split?** The single-CP lifecycle test is the cheapest smoke test and runs
101131
nightly to catch regressions early. HA, upgrade, self-hosted, and remediation tests are more
102-
resource-intensive and run weekly. Full K8s conformance is the most expensive and runs biweekly
132+
resource-intensive and run weekly. Private networking tests require `CLOUDSCALE_NETWORK_UUID` to be set and are
133+
skipped otherwise. Full K8s conformance is the most expensive and runs biweekly
103134
(1st + 15th of month). All suites can be triggered manually via the `test-e2e.yml` workflow
104135
dispatch. E2E tests share a concurrency group so only one suite runs at a time.
105136

@@ -143,6 +174,8 @@ kustomize_substitutions:
143174
CLOUDSCALE_WORKER_MACHINE_FLAVOR: "flex-4-2"
144175
CLOUDSCALE_MACHINE_IMAGE: "IMAGE_NAME"
145176
CLOUDSCALE_ROOT_VOLUME_SIZE: "50"
177+
# Required for BYO network flavors (fip, public-lb-private-nodes, byo-network):
178+
# CLOUDSCALE_NETWORK_UUID: "UUID_HERE"
146179
extra_args:
147180
cloudscale:
148181
- "--zap-log-level=5"

api/v1beta2/cloudscalecluster_types.go

Lines changed: 111 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ const (
2828
ClusterFinalizer = "cloudscalecluster.infrastructure.cluster.x-k8s.io"
2929
)
3030

31+
// IPFamily represents an IP family configuration.
32+
// +kubebuilder:validation:Enum=IPv4;IPv6;DualStack
33+
type IPFamily string
34+
35+
const (
36+
IPFamilyIPv4 IPFamily = "IPv4"
37+
IPFamilyIPv6 IPFamily = "IPv6"
38+
IPFamilyDualStack IPFamily = "DualStack"
39+
)
40+
3141
// CloudscaleClusterSpec defines the desired state of CloudscaleCluster
3242
type CloudscaleClusterSpec struct {
3343
// Region is the cloudscale.ch region (e.g., "rma", "lpg").
@@ -45,17 +55,30 @@ type CloudscaleClusterSpec struct {
4555
CredentialsRef CloudscaleCredentialsReference `json:"credentialsRef"`
4656

4757
// ControlPlaneEndpoint represents the endpoint to communicate with the control plane.
48-
// This is set automatically from the load balancer's VIP address.
58+
// This is set automatically from the load balancer's VIP address or floating IP.
4959
// +optional
5060
ControlPlaneEndpoint clusterv1.APIEndpoint `json:"controlPlaneEndpoint,omitzero"`
5161

52-
// Network contains network configuration for the cluster.
62+
// Networks define the private networks for this cluster.
63+
// Referenced by name from machine interface specs and LB config.
64+
// If empty, defaults to a single managed network named after the cluster.
65+
// +listType=map
66+
// +listMapKey=name
5367
// +optional
54-
Network NetworkSpec `json:"network,omitzero"`
68+
Networks []NetworkSpec `json:"networks,omitempty"`
5569

5670
// ControlPlaneLoadBalancer configures the load balancer for the control plane.
5771
// +optional
5872
ControlPlaneLoadBalancer LoadBalancerSpec `json:"controlPlaneLoadBalancer,omitzero"`
73+
74+
// FloatingIP configures a floating IP for a stable control plane endpoint.
75+
// When the load balancer is enabled (recommended), the floating IP is assigned
76+
// to the LB, providing a stable IP that survives LB recreation.
77+
// When using a BYO floating IP without a load balancer, the user must
78+
// configure a dummy interface on the control plane servers (see cloudscale.ch docs).
79+
// Managed floating IPs require the load balancer to be enabled.
80+
// +optional
81+
FloatingIP *FloatingIPSpec `json:"floatingIP,omitempty"`
5982
}
6083

6184
// CloudscaleCredentialsReference references a Secret containing the API token.
@@ -69,28 +92,43 @@ type CloudscaleCredentialsReference struct {
6992
Namespace string `json:"namespace,omitempty"`
7093
}
7194

72-
// NetworkSpec defines the network configuration.
95+
// NetworkSpec defines a private network for the cluster.
96+
// Exactly one of UUID or CIDR must be specified.
7397
type NetworkSpec struct {
74-
// CIDR is the CIDR block for the private network subnet.
75-
// +kubebuilder:default="10.0.0.0/24"
98+
// Name identifies this network within the cluster.
99+
// Used to reference this network from machine interface specs and LB config.
100+
// +kubebuilder:validation:Required
101+
// +kubebuilder:validation:Pattern=`^[a-z0-9]([a-z0-9-]*[a-z0-9])?$`
102+
// +kubebuilder:validation:MaxLength=63
103+
Name string `json:"name"`
104+
105+
// UUID references an existing cloudscale.ch network (BYO).
106+
// The network is not deleted on cluster teardown.
107+
// Mutually exclusive with CIDR.
108+
// +optional
109+
UUID string `json:"uuid,omitempty"`
110+
111+
// CIDR defines the subnet for a controller-managed network.
112+
// The network and subnet are created and deleted by CAPCS.
113+
// Mutually exclusive with UUID.
76114
// +optional
77115
CIDR string `json:"cidr,omitempty"`
78116

79117
// GatewayAddress is the gateway IP address for the subnet.
80-
// By default, no gateway is configured on the private network subnet. This ensures
81-
// that outbound internet traffic uses the public network interface, which is required
82-
// for the Cloud Controller Manager to reach the cloudscale.ch API.
118+
// Only applicable when CIDR is set (managed network).
119+
// By default, no gateway is configured on the subnet. This ensures
120+
// that outbound internet traffic uses the public network interface.
83121
// Set this to a specific IP address (e.g., "10.0.0.1") only if you have configured
84122
// a NAT gateway or similar infrastructure on the private network.
85123
// +optional
86-
GatewayAddress *string `json:"gatewayAddress,omitempty"`
124+
GatewayAddress string `json:"gatewayAddress,omitempty"`
87125
}
88126

89127
// LoadBalancerSpec defines the load balancer configuration for the control plane.
90128
type LoadBalancerSpec struct {
91129
// Enabled controls whether a load balancer is created for the control plane.
92130
// Set to false for external control planes (e.g., hosted control plane) where the endpoint
93-
// is provided externally.
131+
// is provided externally, or when using a floating IP without a load balancer.
94132
// +kubebuilder:default=true
95133
// +optional
96134
Enabled *bool `json:"enabled,omitempty"`
@@ -113,6 +151,17 @@ type LoadBalancerSpec struct {
113151
// +optional
114152
APIServerPort int32 `json:"apiServerPort,omitempty"`
115153

154+
// Network places the LB VIP on a private network (internal LB).
155+
// References spec.networks[].name. Omit for a public LB.
156+
// +optional
157+
Network string `json:"network,omitempty"`
158+
159+
// IPFamily specifies the IP family for the LB VIP address(es).
160+
// +kubebuilder:validation:Enum=IPv4;IPv6;DualStack
161+
// +kubebuilder:default=DualStack
162+
// +optional
163+
IPFamily IPFamily `json:"ipFamily,omitempty"`
164+
116165
// HealthMonitor configures the load balancer health monitor.
117166
// +optional
118167
HealthMonitor HealthMonitorSpec `json:"healthMonitor,omitempty"`
@@ -149,19 +198,38 @@ type HealthMonitorSpec struct {
149198
DownThreshold int `json:"downThreshold,omitempty"`
150199
}
151200

201+
// FloatingIPSpec configures a floating IP for the control plane endpoint.
202+
// Exactly one of IPFamily or UUID must be specified.
203+
type FloatingIPSpec struct {
204+
// IPFamily creates a new floating IP with this IP version.
205+
// A floating IP is a single address, so DualStack is not valid here.
206+
// Mutually exclusive with UUID.
207+
// +kubebuilder:validation:Enum=IPv4;IPv6
208+
// +optional
209+
IPFamily *IPFamily `json:"ipFamily,omitempty"`
210+
211+
// UUID references an existing floating IP (BYO).
212+
// The floating IP is not deleted on cluster teardown.
213+
// Mutually exclusive with IPFamily.
214+
// +optional
215+
UUID string `json:"uuid,omitempty"`
216+
}
217+
152218
// CloudscaleClusterStatus defines the observed state of CloudscaleCluster.
153219
type CloudscaleClusterStatus struct {
154220
// Initialization contains v1beta2 initialization tracking.
155221
// +optional
156222
Initialization *ClusterInitializationStatus `json:"initialization,omitempty"`
157223

158-
// NetworkID is the cloudscale.ch network UUID.
224+
// Networks track the status of each network defined in spec.networks.
225+
// +listType=map
226+
// +listMapKey=name
159227
// +optional
160-
NetworkID string `json:"networkID,omitempty"`
228+
Networks []NetworkStatus `json:"networks,omitempty"`
161229

162-
// SubnetID is the cloudscale.ch subnet UUID.
230+
// FloatingIP is the cloudscale.ch floating IP.
163231
// +optional
164-
SubnetID string `json:"subnetID,omitempty"`
232+
FloatingIP string `json:"floatingIP,omitempty"`
165233

166234
// LoadBalancerID is the cloudscale.ch load balancer UUID.
167235
// +optional
@@ -184,20 +252,30 @@ type CloudscaleClusterStatus struct {
184252
LoadBalancerMemberIDs []string `json:"loadBalancerMemberIDs,omitempty"`
185253

186254
// conditions represent the current state of the CloudscaleCluster resource.
187-
// Each condition has a unique type and reflects the status of a specific aspect of the resource.
188-
//
189-
// Standard condition types include:
190-
// - "Available": the resource is fully functional
191-
// - "Progressing": the resource is being created or updated
192-
// - "Degraded": the resource failed to reach or maintain its desired state
193-
//
194-
// The status of each condition is one of True, False, or Unknown.
195255
// +listType=map
196256
// +listMapKey=type
197257
// +optional
198258
Conditions []metav1.Condition `json:"conditions,omitempty"`
199259
}
200260

261+
// NetworkStatus tracks the provisioned state of a single network.
262+
type NetworkStatus struct {
263+
// Name matches the logical name from spec.networks[].name.
264+
Name string `json:"name"`
265+
266+
// NetworkID is the cloudscale.ch network UUID.
267+
// +optional
268+
NetworkID string `json:"networkID,omitempty"`
269+
270+
// SubnetID is the cloudscale.ch subnet UUID.
271+
// +optional
272+
SubnetID string `json:"subnetID,omitempty"`
273+
274+
// Managed indicates whether CAPCS manages this network's lifecycle.
275+
// false for BYO networks (referenced by UUID), true for CAPCS-created networks (defined by CIDR).
276+
Managed bool `json:"managed"`
277+
}
278+
201279
// ClusterInitializationStatus contains v1beta2 initialization tracking for CloudscaleCluster.
202280
type ClusterInitializationStatus struct {
203281
// Provisioned indicates that all cluster infrastructure has been provisioned.
@@ -206,6 +284,16 @@ type ClusterInitializationStatus struct {
206284
Provisioned *bool `json:"provisioned,omitempty"`
207285
}
208286

287+
// GetNetworkStatus returns the NetworkStatus for the given network name, or nil if not found.
288+
func (s *CloudscaleClusterStatus) GetNetworkStatus(name string) *NetworkStatus {
289+
for i := range s.Networks {
290+
if s.Networks[i].Name == name {
291+
return &s.Networks[i]
292+
}
293+
}
294+
return nil
295+
}
296+
209297
// +kubebuilder:object:root=true
210298
// +kubebuilder:subresource:status
211299
// +kubebuilder:resource:path=cloudscaleclusters,scope=Namespaced,categories=cluster-api

0 commit comments

Comments
 (0)