Skip to content

Commit 9171950

Browse files
committed
fixes from review
1 parent e3b9fb2 commit 9171950

File tree

16 files changed

+178
-75
lines changed

16 files changed

+178
-75
lines changed

cmd/thv-operator/api/v1alpha1/virtualmcpserver_types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,13 @@ type OperationalConfig struct {
388388
// FailureHandling configures failure handling behavior
389389
// +optional
390390
FailureHandling *FailureHandlingConfig `json:"failureHandling,omitempty"`
391+
392+
// CapabilityCacheTTL is the time-to-live for cached capability entries.
393+
// Defaults to 5 minutes if not specified.
394+
// Shorter values (e.g., "10s") can be used for testing to see capability updates sooner.
395+
// +kubebuilder:default="5m"
396+
// +optional
397+
CapabilityCacheTTL string `json:"capabilityCacheTTL,omitempty"`
391398
}
392399

393400
// TimeoutConfig configures timeout settings

cmd/thv-operator/controllers/virtualmcpserver_controller.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,7 +1128,10 @@ func countBackendHealth(ctx context.Context, backends []mcpv1alpha1.DiscoveredBa
11281128
return ready, unhealthy
11291129
}
11301130

1131-
// determineStatusFromBackends evaluates backend health to determine status
1131+
// determineStatusFromBackends evaluates backend health to determine status.
1132+
// This function should ONLY be called when health monitoring is enabled
1133+
// (spec.operational.failureHandling.healthCheckInterval is set).
1134+
// When health monitoring is disabled, backend health status is meaningless.
11321135
func (*VirtualMCPServerReconciler) determineStatusFromBackends(
11331136
ctx context.Context,
11341137
vmcp *mcpv1alpha1.VirtualMCPServer,
@@ -1222,9 +1225,14 @@ func (r *VirtualMCPServerReconciler) determineStatusFromPods(
12221225
// Get current discovered backends from status manager (may be updated but not applied yet)
12231226
discoveredBackends := statusManager.GetDiscoveredBackends()
12241227

1225-
// Pods are ready (passed readiness probes) - check backend health if backends exist
1226-
if len(discoveredBackends) == 0 {
1227-
// No backends discovered yet - pods ready is sufficient for Ready
1228+
// Check if health monitoring is enabled
1229+
healthMonitoringEnabled := vmcp.Spec.Operational != nil &&
1230+
vmcp.Spec.Operational.FailureHandling != nil &&
1231+
vmcp.Spec.Operational.FailureHandling.HealthCheckInterval != ""
1232+
1233+
// Pods are ready (passed readiness probes) - check backend health if backends exist AND health monitoring is enabled
1234+
if len(discoveredBackends) == 0 || !healthMonitoringEnabled {
1235+
// No backends discovered yet OR health monitoring disabled - pods ready is sufficient for Ready
12281236
return statusDecision{
12291237
phase: mcpv1alpha1.VirtualMCPServerPhaseReady,
12301238
message: "Virtual MCP server is running",
@@ -1234,7 +1242,7 @@ func (r *VirtualMCPServerReconciler) determineStatusFromPods(
12341242
}
12351243
}
12361244

1237-
// Backends exist - determine health status using current backends from status manager
1245+
// Backends exist AND health monitoring enabled - determine health status using current backends from status manager
12381246
return r.determineStatusFromBackends(ctx, vmcp, discoveredBackends)
12391247
}
12401248

cmd/thv-operator/pkg/vmcpconfig/converter.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,5 +921,12 @@ func (*Converter) convertOperational(
921921
}
922922
}
923923

924+
// Parse capability cache TTL
925+
if vmcp.Spec.Operational.CapabilityCacheTTL != "" {
926+
if duration, err := time.ParseDuration(vmcp.Spec.Operational.CapabilityCacheTTL); err == nil {
927+
operational.CapabilityCacheTTL = vmcpconfig.Duration(duration)
928+
}
929+
}
930+
924931
return operational
925932
}

cmd/vmcp/app/commands.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,16 @@ func runServe(cmd *cobra.Command, _ []string) error {
295295
agg := aggregator.NewDefaultAggregator(backendClient, conflictResolver, cfg.Aggregation.Tools)
296296

297297
// Create discovery manager for lazy per-user capability discovery
298-
discoveryMgr, err := discovery.NewManager(agg)
298+
// Use configured cache TTL if available, otherwise use default
299+
var discoveryMgr discovery.Manager
300+
if cfg.Operational != nil && cfg.Operational.CapabilityCacheTTL > 0 {
301+
cacheTTL := time.Duration(cfg.Operational.CapabilityCacheTTL)
302+
logger.Debugf("Creating discovery manager with custom cache TTL: %v", cacheTTL)
303+
discoveryMgr, err = discovery.NewManagerWithTTL(agg, cacheTTL)
304+
} else {
305+
logger.Debugf("Creating discovery manager with default cache TTL: %v", discovery.DefaultCacheTTL)
306+
discoveryMgr, err = discovery.NewManager(agg)
307+
}
299308
if err != nil {
300309
return fmt.Errorf("failed to create discovery manager: %w", err)
301310
}

deploy/charts/operator-crds/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: toolhive-operator-crds
33
description: A Helm chart for installing the ToolHive Operator CRDs into Kubernetes.
44
type: application
5-
version: 0.0.88
5+
version: 0.0.89
66
appVersion: "0.0.1"

deploy/charts/operator-crds/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# ToolHive Operator CRDs Helm Chart
22

3-
![Version: 0.0.88](https://img.shields.io/badge/Version-0.0.88-informational?style=flat-square)
3+
![Version: 0.0.89](https://img.shields.io/badge/Version-0.0.89-informational?style=flat-square)
44
![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
55

66
A Helm chart for installing the ToolHive Operator CRDs into Kubernetes.

deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,13 @@ spec:
644644
description: Operational defines operational settings like timeouts
645645
and health checks
646646
properties:
647+
capabilityCacheTTL:
648+
default: 5m
649+
description: |-
650+
CapabilityCacheTTL is the time-to-live for cached capability entries.
651+
Defaults to 5 minutes if not specified.
652+
Shorter values (e.g., "10s") can be used for testing to see capability updates sooner.
653+
type: string
647654
failureHandling:
648655
description: FailureHandling configures failure handling behavior
649656
properties:

deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,13 @@ spec:
647647
description: Operational defines operational settings like timeouts
648648
and health checks
649649
properties:
650+
capabilityCacheTTL:
651+
default: 5m
652+
description: |-
653+
CapabilityCacheTTL is the time-to-live for cached capability entries.
654+
Defaults to 5 minutes if not specified.
655+
Shorter values (e.g., "10s") can be used for testing to see capability updates sooner.
656+
type: string
650657
failureHandling:
651658
description: FailureHandling configures failure handling behavior
652659
properties:

docs/operator/crd-api.md

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/vmcp/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,11 @@ type OperationalConfig struct {
242242

243243
// FailureHandling configures failure handling.
244244
FailureHandling *FailureHandlingConfig `json:"failure_handling,omitempty" yaml:"failure_handling,omitempty"`
245+
246+
// CapabilityCacheTTL is the time-to-live for cached capability entries.
247+
// Defaults to 5 minutes if not specified or zero.
248+
// Shorter values (e.g., 10s) can be used for testing to see capability updates sooner.
249+
CapabilityCacheTTL Duration `json:"capability_cache_ttl,omitempty" yaml:"capability_cache_ttl,omitempty"`
245250
}
246251

247252
// TimeoutConfig configures timeouts.

0 commit comments

Comments
 (0)