Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions TOOLS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ This MCP server exposes the following tools for interacting with Prometheus/Than
**Usage Tips:**

- YOU MUST CALL THIS TOOL BEFORE ANY OTHER QUERY TOOL
- This tool MUST be called first for EVERY observability question to: 1. Discover what metrics actually exist in this environment 2. Find the EXACT metric name to use in queries 3. Avoid querying non-existent metrics
- This tool MUST be called first for EVERY observability question to: 1. Discover what metrics actually exist in this environment 2. Find the EXACT metric name to use in queries 3. Avoid querying non-existent metrics 4. The 'name_regex' parameter should always be provided, and be a best guess of what the metric would be named like. 5. Do not use a blanket regex like .* or .+ in the 'name_regex' parameter. Use specific ones like kube.*, node.*, etc.
- NEVER skip this step. NEVER guess metric names. Metric names vary between environments.
- After calling this tool: 1. Search the returned list for relevant metrics 2. Use the EXACT metric name found in subsequent queries 3. If no relevant metric exists, inform the user

| | |
| :------------- | :--- |
| **Parameters** | None |
**Parameters:**

| Parameter | Type | Required | Description |
| :----------- | :------- | :------: | :------------------------------------------------------------------------------------------------------------------------------------ |
| `name_regex` | `string` | ✅ | Regex pattern to filter metric names (e.g., 'http_.*', 'node_.*', 'kube.*'). This parameter is required. Don't pass in blanket regex. |

**Output Schema:**

Expand Down
2 changes: 1 addition & 1 deletion pkg/mcp/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func ListMetricsHandler(opts ObsMCPOptions) func(context.Context, mcp.CallToolRe
return mcp.NewToolResultError(fmt.Sprintf("failed to create Prometheus client: %s", err.Error())), nil
}

return tools.ListMetricsHandler(ctx, promClient).ToMCPResult()
return tools.ListMetricsHandler(ctx, promClient, tools.BuildListMetricsInput(req.GetArguments())).ToMCPResult()
}
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/mcp/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@ import (

// MockedLoader is a mock implementation of prometheus.PromClient for testing
type MockedLoader struct {
ListMetricsFunc func(ctx context.Context) ([]string, error)
ListMetricsFunc func(ctx context.Context, nameRegex string) ([]string, error)
ExecuteRangeQueryFunc func(ctx context.Context, query string, start, end time.Time, step time.Duration) (map[string]any, error)
ExecuteInstantQueryFunc func(ctx context.Context, query string, time time.Time) (map[string]any, error)
GetLabelNamesFunc func(ctx context.Context, metricName string, start, end time.Time) ([]string, error)
GetLabelValuesFunc func(ctx context.Context, label string, metricName string, start, end time.Time) ([]string, error)
GetSeriesFunc func(ctx context.Context, matches []string, start, end time.Time) ([]map[string]string, error)
}

func (m *MockedLoader) ListMetrics(ctx context.Context) ([]string, error) {
func (m *MockedLoader) ListMetrics(ctx context.Context, nameRegex string) ([]string, error) {
if m.ListMetricsFunc != nil {
return m.ListMetricsFunc(ctx)
return m.ListMetricsFunc(ctx, nameRegex)
}
return []string{}, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/mcp/tools_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func TestToolParameters(t *testing.T) {
}{
{
tool: CreateListMetricsTool(),
expectedRequired: []string{},
expectedRequired: []string{"name_regex"},
expectedOptional: []string{},
},
{
Expand Down
17 changes: 13 additions & 4 deletions pkg/prometheus/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const (

// Loader defines the interface for querying Prometheus
type Loader interface {
ListMetrics(ctx context.Context) ([]string, error)
ListMetrics(ctx context.Context, nameRegex string) ([]string, error)
ExecuteRangeQuery(ctx context.Context, query string, start, end time.Time, step time.Duration) (map[string]any, error)
ExecuteInstantQuery(ctx context.Context, query string, time time.Time) (map[string]any, error)
GetLabelNames(ctx context.Context, metricName string, start, end time.Time) ([]string, error)
Expand Down Expand Up @@ -54,8 +54,16 @@ func (p *RealLoader) WithGuardrails(g *Guardrails) *RealLoader {
return p
}

func (p *RealLoader) ListMetrics(ctx context.Context) ([]string, error) {
labelValues, _, err := p.client.LabelValues(ctx, "__name__", []string{}, time.Now().Add(-ListMetricsTimeRange), time.Now())
func (p *RealLoader) ListMetrics(ctx context.Context, nameRegex string) ([]string, error) {
var matches []string

// For blanket regex patterns like ".*", use empty matcher to get all metrics to not get 4xx.
if nameRegex != ".*" && nameRegex != ".+" && nameRegex != "" {
matcher := fmt.Sprintf("{__name__=~\"%s\"}", nameRegex) //nolint:gocritic
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with this fix, we don't need the additional if anymore, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do, as prometheus treats .* as an empty matcher, and fails the request with {"status":"error","errorType":"bad_data","error":"match[] must contain at least one non-empty matcher"}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are absolutely correct :)

matches = []string{matcher}
}

labelValues, _, err := p.client.LabelValues(ctx, "__name__", matches, time.Now().Add(-ListMetricsTimeRange), time.Now())
if err != nil {
return nil, fmt.Errorf("error fetching metric names: %w", err)
}
Expand All @@ -81,7 +89,8 @@ func (p *RealLoader) ValidateMetricsExist(ctx context.Context, query string) err
return nil
}

availableMetricsList, err := p.ListMetrics(ctx)
// Use ".*" to match all metrics for validation
availableMetricsList, err := p.ListMetrics(ctx, ".*")
if err != nil {
return fmt.Errorf("failed to fetch available metrics: %w", err)
}
Expand Down
9 changes: 8 additions & 1 deletion pkg/tools/definitions.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ var (
Name: "list_metrics",
Description: ListMetricsPrompt,
Title: "List Available Metrics",
Params: []ParamDef{}, // no parameters
Params: []ParamDef{
{
Name: "name_regex",
Type: ParamTypeString,
Description: "Regex pattern to filter metric names (e.g., 'http_.*', 'node_.*', 'kube.*'). This parameter is required. Don't pass in blanket regex.",
Required: true,
},
},
ReadOnly: true,
Destructive: false,
Idempotent: true,
Expand Down
16 changes: 14 additions & 2 deletions pkg/tools/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ func GetBoolPtr(params map[string]any, key string) *bool {
return nil
}

func BuildListMetricsInput(args map[string]any) ListMetricsInput {
return ListMetricsInput{
NameRegex: GetString(args, "name_regex", ""),
}
}

func BuildInstantQueryInput(args map[string]any) InstantQueryInput {
return InstantQueryInput{
Query: GetString(args, "query", ""),
Expand Down Expand Up @@ -95,10 +101,16 @@ func BuildSilencesInput(args map[string]any) SilencesInput {
}

// ListMetricsHandler handles the listing of available Prometheus metrics.
func ListMetricsHandler(ctx context.Context, promClient prometheus.Loader) *resultutil.Result {
func ListMetricsHandler(ctx context.Context, promClient prometheus.Loader, input ListMetricsInput) *resultutil.Result {
slog.Info("ListMetricsHandler called")
slog.Debug("ListMetricsHandler params", "input", input)

// Validate required parameters
if input.NameRegex == "" {
return resultutil.NewErrorResult(fmt.Errorf("name_regex parameter is required and must be a string"))
}

metrics, err := promClient.ListMetrics(ctx)
metrics, err := promClient.ListMetrics(ctx, input.NameRegex)
if err != nil {
slog.Error("failed to list metrics", "error", err)
return resultutil.NewErrorResult(fmt.Errorf("failed to list metrics: %w", err))
Expand Down
3 changes: 3 additions & 0 deletions pkg/tools/prompt.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ If the user mentions a specific alert by name, use get_alerts with a filter to r
- This is NON-NEGOTIABLE for EVERY question
- NEVER skip this step, even if you think you know the metric name
- NEVER guess metric names - they vary between environments
- Always pass in a name_regex param to it with a best guess of what the metric would be named like.
- Search the returned list to find the exact metric name that exists

**STEP 2: Call get_label_names for the metric you found**
Expand Down Expand Up @@ -48,6 +49,8 @@ This tool MUST be called first for EVERY observability question to:
1. Discover what metrics actually exist in this environment
2. Find the EXACT metric name to use in queries
3. Avoid querying non-existent metrics
4. The 'name_regex' parameter should always be provided, and be a best guess of what the metric would be named like.
5. Do not use a blanket regex like .* or .+ in the 'name_regex' parameter. Use specific ones like kube.*, node.*, etc.

NEVER skip this step. NEVER guess metric names. Metric names vary between environments.

Expand Down
5 changes: 5 additions & 0 deletions pkg/tools/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ type Matcher struct {

// Input structs for handler parameters

// ListMetricsInput defines the input parameters for ListMetricsHandler.
type ListMetricsInput struct {
NameRegex string `json:"name_regex"`
}

// RangeQueryInput defines the input parameters for ExecuteRangeQueryHandler.
type RangeQueryInput struct {
Query string `json:"query"`
Expand Down
2 changes: 1 addition & 1 deletion pkg/toolset/tools/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func ListMetricsHandler(params api.ToolHandlerParams) (*api.ToolCallResult, erro
return api.NewToolCallResult("", fmt.Errorf("failed to create Prometheus client: %w", err)), nil
}

return tools.ListMetricsHandler(params.Context, promClient).ToToolsetResult()
return tools.ListMetricsHandler(params.Context, promClient, tools.BuildListMetricsInput(params.GetArguments())).ToToolsetResult()
}

// ExecuteInstantQueryHandler handles the execution of Prometheus instant queries.
Expand Down
29 changes: 19 additions & 10 deletions tests/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,25 +66,34 @@ func TestHealthEndpoint(t *testing.T) {
}
}

func TestListMetrics(t *testing.T) {
resp, err := mcpClient.CallTool(t, 1, "list_metrics", map[string]any{})
func TestListMetricsReturnsKnownMetrics(t *testing.T) {
resp, err := mcpClient.CallTool(t, 2, "list_metrics", map[string]any{
"name_regex": ".*",
})
if err != nil {
t.Fatalf("Failed to call list_metrics: %v", err)
}

if resp.Error != nil {
t.Errorf("MCP error: %s", resp.Error.Message)
t.Fatalf("MCP error: %s", resp.Error.Message)
}

if resp.Result == nil {
t.Error("Expected result, got nil")
}
// Verify known metrics from kube-prometheus are present
resultJSON, _ := json.Marshal(resp.Result)
resultStr := string(resultJSON)

t.Logf("list_metrics returned successfully")
expectedMetrics := []string{"up", "prometheus_build_info"}
for _, metric := range expectedMetrics {
if !strings.Contains(resultStr, metric) {
t.Errorf("Expected metric %q not found in results", metric)
}
}
}

func TestListMetricsReturnsKnownMetrics(t *testing.T) {
resp, err := mcpClient.CallTool(t, 2, "list_metrics", map[string]any{})
func TestListMetricsReturnsKnownMetricsWithMatcher(t *testing.T) {
resp, err := mcpClient.CallTool(t, 2, "list_metrics", map[string]any{
"name_regex": "prometheus.*",
})
if err != nil {
t.Fatalf("Failed to call list_metrics: %v", err)
}
Expand All @@ -97,7 +106,7 @@ func TestListMetricsReturnsKnownMetrics(t *testing.T) {
resultJSON, _ := json.Marshal(resp.Result)
resultStr := string(resultJSON)

expectedMetrics := []string{"up", "prometheus_build_info"}
expectedMetrics := []string{"prometheus_build_info"}
for _, metric := range expectedMetrics {
if !strings.Contains(resultStr, metric) {
t.Errorf("Expected metric %q not found in results", metric)
Expand Down