stacklok · therealnb · Jan 13, 2026 · Jan 15, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/.gitignore b/.gitignore
@@ -42,4 +42,11 @@ cmd/thv-operator/.task/checksum/crdref-gen
 # Test coverage
 coverage*
 
-crd-helm-wrapper
+crd-helm-wrapper
+cmd/vmcp/__debug_bin*
+
+# Demo files
+examples/operator/virtual-mcps/vmcp_optimizer.yaml
+scripts/k8s_vmcp_optimizer_demo.sh
+examples/ingress/mcp-servers-ingress.yaml
+/vmcp
diff --git a/.golangci.yml b/.golangci.yml
@@ -139,6 +139,7 @@ linters:
       - third_party$
       - builtin$
       - examples$
+      - scripts$
 formatters:
   enable:
     - gci
@@ -155,3 +156,4 @@ formatters:
       - third_party$
       - builtin$
       - examples$
+      - scripts$
diff --git a/cmd/thv-operator/controllers/mcpserver_controller.go b/cmd/thv-operator/controllers/mcpserver_controller.go
@@ -1250,12 +1250,13 @@ func (r *MCPServerReconciler) deploymentForMCPServer(
 				Spec: corev1.PodSpec{
 					ServiceAccountName: ctrlutil.ProxyRunnerServiceAccountName(m.Name),
 					Containers: []corev1.Container{{
-						Image:        getToolhiveRunnerImage(),
-						Name:         "toolhive",
-						Args:         args,
-						Env:          env,
-						VolumeMounts: volumeMounts,
-						Resources:    resources,
+						Image:           getToolhiveRunnerImage(),
+						Name:            "toolhive",
+						ImagePullPolicy: getImagePullPolicyForToolhiveRunner(),
+						Args:            args,
+						Env:             env,
+						VolumeMounts:    volumeMounts,
+						Resources:       resources,
 						Ports: []corev1.ContainerPort{{
 							ContainerPort: m.GetProxyPort(),
 							Name:          "http",
@@ -1813,6 +1814,19 @@ func getToolhiveRunnerImage() string {
 	return image
 }
 
+// getImagePullPolicyForToolhiveRunner returns the appropriate imagePullPolicy for the toolhive runner container.
+// If the image is a local image (starts with "kind.local/" or "localhost/"), use Never.
+// Otherwise, use IfNotPresent to allow pulling when needed but avoid unnecessary pulls.
+func getImagePullPolicyForToolhiveRunner() corev1.PullPolicy {
+	image := getToolhiveRunnerImage()
+	// Check if it's a local image that should use Never
+	if strings.HasPrefix(image, "kind.local/") || strings.HasPrefix(image, "localhost/") {
+		return corev1.PullNever
+	}
+	// For other images, use IfNotPresent to allow pulling when needed
+	return corev1.PullIfNotPresent
+}
+
 // handleExternalAuthConfig validates and tracks the hash of the referenced MCPExternalAuthConfig.
 // It updates the MCPServer status when the external auth configuration changes.
 func (r *MCPServerReconciler) handleExternalAuthConfig(ctx context.Context, m *mcpv1alpha1.MCPServer) error {

diff --git a/cmd/thv-operator/pkg/optimizer/INTEGRATION.md b/cmd/thv-operator/pkg/optimizer/INTEGRATION.md
@@ -0,0 +1,134 @@
+# Integrating Optimizer with vMCP
+
+## Overview
+
+The optimizer package ingests MCP server and tool metadata into a searchable database with semantic embeddings. This enables intelligent tool discovery and token optimization for LLM consumption.
+
+## Integration Approach
+
+**Event-Driven Ingestion**: The optimizer integrates directly with vMCP's startup process. When vMCP starts and loads its configured servers, it calls the optimizer to ingest each server's metadata and tools.
+
+❌ **NOT** a separate polling service discovering backends
+✅ **IS** called directly by vMCP during server initialization
+
+## How It Is Integrated
+
+The optimizer is already integrated into vMCP and works automatically when enabled via configuration. Here's how the integration works:
+
+### Initialization
+
+When vMCP starts with optimizer enabled in the configuration, it:
+
+1. Initializes the optimizer database (chromem-go + SQLite FTS5)
+2. Configures the embedding backend (placeholder, Ollama, or vLLM)
+3. Sets up the ingestion service
+
+### Automatic Ingestion
+
+The optimizer integrates with vMCP's `OnRegisterSession` hook, which is called whenever:
+
+- vMCP starts and loads configured MCP servers
+- A new MCP server is dynamically added
+- A session reconnects or refreshes
+
+When this hook is triggered, the optimizer:
+
+1. Retrieves the server's metadata and tools via MCP protocol
+2. Generates embeddings for searchable content
+3. Stores the data in both the vector database (chromem-go) and FTS5 database
+4. Makes the tools immediately available for semantic search
+
+### Exposed Tools
+
+When the optimizer is enabled, vMCP automatically exposes these tools to LLM clients:
+
+- `optim.find_tool`: Semantic search for tools across all registered servers
+- `optim.call_tool`: Dynamic tool invocation after discovery
+
+### Implementation Location
+
+The integration code is located in:
+- `cmd/vmcp/optimizer.go`: Optimizer initialization and configuration
+- `pkg/vmcp/optimizer/optimizer.go`: Session registration hook implementation
+- `cmd/thv-operator/pkg/optimizer/ingestion/service.go`: Core ingestion service
+
+## Configuration
+
+Add optimizer configuration to vMCP's config:
+
+```yaml
+# vMCP config
+optimizer:
+  enabled: true
+  db_path: /data/optimizer.db
+  embedding:
+    backend: vllm  # or "ollama" for local dev, "placeholder" for testing
+    url: http://vllm-service:8000
+    model: sentence-transformers/all-MiniLM-L6-v2
+    dimension: 384
+```
+
+## Error Handling
+
+**Important**: Optimizer failures should NOT break vMCP functionality:
+
+- ✅ Log warnings if optimizer fails
+- ✅ Continue server startup even if ingestion fails
+- ✅ Run ingestion in goroutines to avoid blocking
+- ❌ Don't fail server startup if optimizer is unavailable
+
+## Benefits
+
+1. **Automatic**: Servers are indexed as they're added to vMCP
+2. **Up-to-date**: Database reflects current vMCP state
+3. **No polling**: Event-driven, efficient
+4. **Semantic search**: Enables intelligent tool discovery
+5. **Token optimization**: Tracks token usage for LLM efficiency
+
+## Testing
+
+```go
+func TestOptimizerIntegration(t *testing.T) {
+    // Initialize optimizer
+    optimizerSvc, err := ingestion.NewService(&ingestion.Config{
+        DBConfig: &db.Config{Path: "/tmp/test-optimizer.db"},
+        EmbeddingConfig: &embeddings.Config{
+            BackendType: "ollama",
+            BaseURL:     "http://localhost:11434",
+            Model:       "all-minilm",
+            Dimension:   384,
+            Dimension:   384,
+        },
+    })
+    require.NoError(t, err)
+    defer optimizerSvc.Close()
+
+    // Simulate vMCP starting a server
+    ctx := context.Background()
+    tools := []mcp.Tool{
+        {Name: "get_weather", Description: "Get current weather"},
+        {Name: "get_forecast", Description: "Get weather forecast"},
+    }
+
+    err = optimizerSvc.IngestServer(
+        ctx,
+        "weather-001",
+        "weather-service",
+        "http://weather.local",
+        models.TransportSSE,
+        ptr("Weather information service"),
+        tools,
+    )
+    require.NoError(t, err)
+
+    // Verify ingestion
+    server, err := optimizerSvc.GetServer(ctx, "weather-001")
+    require.NoError(t, err)
+    assert.Equal(t, "weather-service", server.Name)
+}
+```
+
+## See Also
+
+- [Optimizer Package README](./README.md) - Package overview and API
+