Skip to content

Commit 7053fde

Browse files
authored
feat(dsl): 3-tier VDG local analysis replacing line-proximity (PR-04) (#595)
Part 4/8 of V5 QueryType × VDG Integration. CFG-aware VDG → Flat VDG → Line-proximity fallback.
1 parent 965dca0 commit 7053fde

File tree

4 files changed

+190
-26
lines changed

4 files changed

+190
-26
lines changed

sast-engine/dsl/bridge_test.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,3 +204,92 @@ func TestNewDataflowExecutor_InitializesDefaults(t *testing.T) {
204204
t.Error("CallGraph should be set")
205205
}
206206
}
207+
208+
func TestConfidenceForMethod(t *testing.T) {
209+
executor := NewDataflowExecutor(&DataflowIR{}, core.NewCallGraph())
210+
211+
tests := []struct {
212+
method string
213+
expected float64
214+
}{
215+
{"cfg_vdg", 0.95},
216+
{"flat_vdg", 0.85},
217+
{"interprocedural_vdg", 0.80},
218+
{"line_proximity", 0.50},
219+
{"unknown", 0.60},
220+
}
221+
222+
for _, tc := range tests {
223+
got := executor.confidenceForMethod(tc.method)
224+
if got != tc.expected {
225+
t.Errorf("confidenceForMethod(%q) = %v, want %v", tc.method, got, tc.expected)
226+
}
227+
}
228+
}
229+
230+
func TestExecuteLocal_LegacyFallback(t *testing.T) {
231+
// No Statements populated → should fall back to line_proximity
232+
cg := core.NewCallGraph()
233+
cg.CallSites["test.handler"] = []core.CallSite{
234+
{Target: "os.getenv", Location: core.Location{Line: 1}},
235+
{Target: "eval", Location: core.Location{Line: 5}},
236+
}
237+
// Intentionally NOT setting cg.Statements
238+
239+
ir := &DataflowIR{
240+
Sources: toRawMessages(CallMatcherIR{Type: "call_matcher", Patterns: []string{"os.getenv"}}),
241+
Sinks: toRawMessages(CallMatcherIR{Type: "call_matcher", Patterns: []string{"eval"}}),
242+
Sanitizers: emptyRawMessages(),
243+
Scope: "local",
244+
}
245+
246+
executor := NewDataflowExecutor(ir, cg)
247+
detections := executor.executeLocal()
248+
249+
if len(detections) != 1 {
250+
t.Fatalf("expected 1 detection from legacy fallback, got %d", len(detections))
251+
}
252+
if detections[0].MatchMethod != "line_proximity" {
253+
t.Errorf("expected MatchMethod 'line_proximity', got %q", detections[0].MatchMethod)
254+
}
255+
if detections[0].Confidence != 0.50 {
256+
t.Errorf("expected Confidence 0.50, got %v", detections[0].Confidence)
257+
}
258+
}
259+
260+
func TestExecuteLocal_VDGAnalysis(t *testing.T) {
261+
// With Statements populated → should use VDG
262+
funcFQN := "test.module.handler"
263+
cg := core.NewCallGraph()
264+
cg.CallSites[funcFQN] = []core.CallSite{
265+
{Target: "os.getenv", Location: core.Location{Line: 1}},
266+
{Target: "eval", Location: core.Location{Line: 2}},
267+
}
268+
cg.Statements[funcFQN] = []*core.Statement{
269+
makeTestAssignStmt(1, "x", "os.getenv", []string{}),
270+
makeTestCallStmt(2, "eval", []string{"x"}),
271+
}
272+
273+
ir := &DataflowIR{
274+
Sources: toRawMessages(CallMatcherIR{Type: "call_matcher", Patterns: []string{"os.getenv"}}),
275+
Sinks: toRawMessages(CallMatcherIR{Type: "call_matcher", Patterns: []string{"eval"}}),
276+
Sanitizers: emptyRawMessages(),
277+
Scope: "local",
278+
}
279+
280+
executor := NewDataflowExecutor(ir, cg)
281+
detections := executor.executeLocal()
282+
283+
if len(detections) != 1 {
284+
t.Fatalf("expected 1 detection from VDG, got %d", len(detections))
285+
}
286+
if detections[0].MatchMethod != "flat_vdg" {
287+
t.Errorf("expected MatchMethod 'flat_vdg', got %q", detections[0].MatchMethod)
288+
}
289+
if detections[0].Confidence != 0.85 {
290+
t.Errorf("expected Confidence 0.85, got %v", detections[0].Confidence)
291+
}
292+
if detections[0].TaintedVar == "" {
293+
t.Error("expected TaintedVar to be set")
294+
}
295+
}

sast-engine/dsl/dataflow_executor.go

Lines changed: 97 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,13 @@ func (e *DataflowExecutor) Execute() []DataflowDetection {
4141
return e.executeGlobal()
4242
}
4343

44-
// executeLocal performs intra-procedural taint analysis.
44+
// executeLocal performs intra-procedural taint analysis with 3-tier fallback:
45+
// Tier 1: CFG-aware VDG (highest confidence) — uses control flow graph + variable dependency graph
46+
// Tier 2: Flat VDG — uses variable dependency graph without CFG
47+
// Tier 3: Line-number proximity (legacy fallback) — when no statements available.
4548
func (e *DataflowExecutor) executeLocal() []DataflowDetection {
4649
detections := []DataflowDetection{}
4750

48-
// Resolve matchers polymorphically.
4951
sourceCalls := e.resolveMatchers(e.IR.Sources)
5052
if len(sourceCalls) == 0 {
5153
e.Diagnostics.Addf("debug", "dataflow", "0 sources found, skipping local analysis")
@@ -60,45 +62,123 @@ func (e *DataflowExecutor) executeLocal() []DataflowDetection {
6062

6163
sanitizerCalls := e.resolveMatchers(e.IR.Sanitizers)
6264

63-
// For local scope, check if source and sink are in the same function.
65+
sourcePatterns := e.extractTargetPatterns(sourceCalls)
66+
sinkPatterns := e.extractTargetPatterns(sinkCalls)
67+
sanitizerPatterns := e.extractTargetPatterns(sanitizerCalls)
68+
69+
candidateFuncs := e.findFunctionsWithSourcesAndSinks(sourceCalls, sinkCalls)
70+
71+
for _, funcFQN := range candidateFuncs {
72+
stmts := e.getStatementsForFunction(funcFQN)
73+
if len(stmts) == 0 {
74+
// Tier 3: Legacy line-number proximity (no statements available)
75+
e.executeLocalLegacy(funcFQN, sourceCalls, sinkCalls, sanitizerCalls, &detections)
76+
continue
77+
}
78+
79+
// Tier 1: CFG-aware VDG
80+
analysisMethod := "flat_vdg"
81+
var summary *core.TaintSummary
82+
83+
if raw, exists := e.CallGraph.CFGs[funcFQN]; exists {
84+
if cfGraph, ok := raw.(*cfg.ControlFlowGraph); ok {
85+
if rawBS, bsExists := e.CallGraph.CFGBlockStatements[funcFQN]; bsExists {
86+
if blockStmts, bsOK := rawBS.(cfg.BlockStatements); bsOK && len(blockStmts) > 0 {
87+
summary = taint.AnalyzeWithCFG(funcFQN, cfGraph, blockStmts,
88+
sourcePatterns, sinkPatterns, sanitizerPatterns)
89+
analysisMethod = "cfg_vdg"
90+
}
91+
}
92+
}
93+
}
94+
95+
// Tier 2: Flat VDG (if Tier 1 found no detections)
96+
if summary == nil || !summary.HasDetections() {
97+
summary = taint.AnalyzeWithVDG(funcFQN, stmts,
98+
sourcePatterns, sinkPatterns, sanitizerPatterns)
99+
analysisMethod = "flat_vdg"
100+
}
101+
102+
if summary != nil {
103+
for _, det := range summary.Detections {
104+
detections = append(detections, DataflowDetection{
105+
FunctionFQN: funcFQN,
106+
SourceLine: int(det.SourceLine),
107+
SinkLine: int(det.SinkLine),
108+
TaintedVar: det.SourceVar,
109+
SinkCall: det.SinkCall,
110+
Confidence: e.confidenceForMethod(analysisMethod),
111+
Sanitized: false,
112+
Scope: "local",
113+
MatchMethod: analysisMethod,
114+
})
115+
}
116+
}
117+
}
118+
119+
return detections
120+
}
121+
122+
// confidenceForMethod returns the confidence score for a given analysis method.
123+
func (e *DataflowExecutor) confidenceForMethod(method string) float64 {
124+
switch method {
125+
case "cfg_vdg":
126+
return 0.95
127+
case "flat_vdg":
128+
return 0.85
129+
case "interprocedural_vdg":
130+
return 0.80
131+
case "line_proximity":
132+
return 0.50
133+
default:
134+
return 0.60
135+
}
136+
}
137+
138+
// executeLocalLegacy performs line-number proximity analysis (Tier 3 fallback).
139+
// Used when no statements are available for a function.
140+
func (e *DataflowExecutor) executeLocalLegacy(
141+
funcFQN string,
142+
sourceCalls, sinkCalls, sanitizerCalls []CallSiteMatch,
143+
detections *[]DataflowDetection,
144+
) {
64145
for _, source := range sourceCalls {
146+
if source.FunctionFQN != funcFQN {
147+
continue
148+
}
65149
for _, sink := range sinkCalls {
66-
if source.FunctionFQN != sink.FunctionFQN {
150+
if sink.FunctionFQN != funcFQN {
67151
continue
68152
}
69153

70154
hasSanitizer := false
71-
for _, sanitizer := range sanitizerCalls {
72-
if sanitizer.FunctionFQN == source.FunctionFQN {
73-
if (sanitizer.Line > source.Line && sanitizer.Line < sink.Line) ||
74-
(sanitizer.Line > sink.Line && sanitizer.Line < source.Line) {
155+
for _, san := range sanitizerCalls {
156+
if san.FunctionFQN == funcFQN {
157+
if (san.Line > source.Line && san.Line < sink.Line) ||
158+
(san.Line > sink.Line && san.Line < source.Line) {
75159
hasSanitizer = true
76160
break
77161
}
78162
}
79163
}
80-
81164
if hasSanitizer {
82165
continue
83166
}
84167

85-
detection := DataflowDetection{
86-
FunctionFQN: source.FunctionFQN,
168+
*detections = append(*detections, DataflowDetection{
169+
FunctionFQN: funcFQN,
87170
SourceLine: source.Line,
88171
SourceColumn: source.CallSite.Location.Column,
89172
SinkLine: sink.Line,
90173
SinkColumn: sink.CallSite.Location.Column,
91174
SinkCall: sink.CallSite.Target,
92-
Confidence: e.Config.getLocalScopeConfidence(),
175+
Confidence: 0.50,
93176
Sanitized: false,
94177
Scope: "local",
95-
}
96-
97-
detections = append(detections, detection)
178+
MatchMethod: "line_proximity",
179+
})
98180
}
99181
}
100-
101-
return detections
102182
}
103183

104184
// executeGlobal performs inter-procedural taint analysis.

sast-engine/dsl/dataflow_executor_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ func TestDataflowExecutor_Local(t *testing.T) {
6363
assert.Equal(t, 10, detections[0].SinkLine)
6464
assert.Equal(t, "execute", detections[0].SinkCall)
6565
assert.Equal(t, "local", detections[0].Scope)
66-
assert.Equal(t, 0.7, detections[0].Confidence)
66+
assert.Equal(t, 0.50, detections[0].Confidence)
67+
assert.Equal(t, "line_proximity", detections[0].MatchMethod)
6768
assert.False(t, detections[0].Sanitized)
6869
})
6970

sast-engine/dsl/dataflow_executor_vdg_test.go

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,8 @@ func TestVDGIntegration_Case4_SanitizerKills(t *testing.T) {
162162
}
163163
}
164164

165-
// TestVDGIntegration_Case5_UnrelatedVars tests: x = source(); sink(y) -> NO DETECT
166-
// Skip: requires VDG variable tracking (PR-04). Line-proximity executor detects this as a false positive.
165+
// TestVDGIntegration_Case5_UnrelatedVars tests: x = source(); sink(y) -> NO DETECT.
167166
func TestVDGIntegration_Case5_UnrelatedVars(t *testing.T) {
168-
t.Skip("Requires VDG variable tracking (PR-04): line-proximity cannot distinguish unrelated variables")
169167

170168
funcFQN := "test.module.case_unrelated"
171169
stmts := []*core.Statement{
@@ -193,10 +191,8 @@ func TestVDGIntegration_Case5_UnrelatedVars(t *testing.T) {
193191
}
194192
}
195193

196-
// TestVDGIntegration_Case6_ReassignmentKills tests: x = source(); x = "safe"; sink(x) -> NO DETECT
197-
// Skip: requires VDG variable tracking (PR-04). Line-proximity executor cannot track reassignment.
194+
// TestVDGIntegration_Case6_ReassignmentKills tests: x = source(); x = "safe"; sink(x) -> NO DETECT.
198195
func TestVDGIntegration_Case6_ReassignmentKills(t *testing.T) {
199-
t.Skip("Requires VDG variable tracking (PR-04): line-proximity cannot track reassignment kills")
200196

201197
funcFQN := "test.module.case_reassign"
202198
stmts := []*core.Statement{
@@ -331,7 +327,6 @@ func TestVDGIntegration_Scorecard(t *testing.T) {
331327
{Target: "eval", Location: core.Location{Line: 2}},
332328
},
333329
expectDetected: false,
334-
skip: "Requires VDG variable tracking (PR-04)",
335330
},
336331
{
337332
name: "6. Reassignment kills taint",
@@ -345,7 +340,6 @@ func TestVDGIntegration_Scorecard(t *testing.T) {
345340
{Target: "eval", Location: core.Location{Line: 3}},
346341
},
347342
expectDetected: false,
348-
skip: "Requires VDG variable tracking (PR-04)",
349343
},
350344
{
351345
name: "7. Multi-hop transitive flow",

0 commit comments

Comments
 (0)