-
Notifications
You must be signed in to change notification settings - Fork 24
fix: improve dynamic member access fallback and dataflow continuity #113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
699a0b6
86f3473
812b3a0
9d11215
30fdbdc
d46215b
e61be5f
f32f9fb
2067880
b08bead
d23f87b
9f58641
fbd5978
6c0fcd4
4aad909
92b1953
dc7583b
b96962a
3cceb62
c8ff9dc
39669e6
8d19c17
61c8b5f
f84b50f
15da4c1
2682e6a
fee55b1
a1721c2
f4428da
0d63baf
e20920a
8244780
73ae7ef
6f50c2a
d3ac17a
3ec065b
e94be76
a8d9c60
fe20571
9c4d96f
8bd420d
b9f0fa1
ec630ee
9b559bb
a058641
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| [ | ||
| { | ||
| "checkerIds": ["taint_flow_python_input"], | ||
| "sources": { | ||
| "FuncCallReturnValueTaintSource": [ | ||
| { | ||
| "fsig": "input", | ||
| "scopeFile": "all", | ||
| "scopeFunc": "all" | ||
| } | ||
| ] | ||
| }, | ||
| "sinks": { | ||
| "FuncCallTaintSink": [ | ||
| { | ||
| "fsig": "sqlalchemy.text", | ||
| "args": ["0"], | ||
| "attribute": "SQLInjection" | ||
| }, | ||
| { | ||
| "fsig": "text", | ||
| "args": ["0"], | ||
| "attribute": "SQLInjection" | ||
| } | ||
| ] | ||
| } | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -259,7 +259,11 @@ class PythonAnalyzer extends (Analyzer as any) { | |||||||||||
| einfo: state.einfo, | ||||||||||||
| }) | ||||||||||||
|
|
||||||||||||
| const fclos = this.processInstruction(scope, node.callee, state) | ||||||||||||
| let fclos = this.processInstruction(scope, node.callee, state) | ||||||||||||
| const callFallback = this.tryResolveMemberCallFallback(scope, node, state, fclos) | ||||||||||||
| if (callFallback) { | ||||||||||||
| fclos = callFallback | ||||||||||||
| } | ||||||||||||
| if (node?.callee?.type === 'MemberAccess' && fclos.fdef && node.callee?.object?.type !== 'SuperExpression') { | ||||||||||||
| fclos._this = this.processInstruction(scope, node.callee.object, state) | ||||||||||||
| } | ||||||||||||
|
|
@@ -336,6 +340,7 @@ class PythonAnalyzer extends (Analyzer as any) { | |||||||||||
| return | ||||||||||||
| } | ||||||||||||
| const res = this.executeCall(node, fclos, argvalues, state, scope) | ||||||||||||
| this.propagateTaintFromMemberReceiver(node, fclos, res) | ||||||||||||
|
|
||||||||||||
| if (fclos.vtype !== 'fclos' && Config.invokeCallbackOnUnknownFunction) { | ||||||||||||
| this.executeFunctionInArguments(scope, fclos, node, argvalues, state) | ||||||||||||
|
|
@@ -355,6 +360,90 @@ class PythonAnalyzer extends (Analyzer as any) { | |||||||||||
| return res | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * Fallback for member calls unresolved as symbol/undefine. | ||||||||||||
| * It binds the method by name from loaded class definitions to current receiver. | ||||||||||||
| */ | ||||||||||||
| tryResolveMemberCallFallback(scope: any, node: any, state: any, currentFclos: any) { | ||||||||||||
| if (!node || node?.callee?.type !== 'MemberAccess') { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
| if (!currentFclos || !['symbol', 'undefine', 'uninitialized'].includes(currentFclos.vtype)) { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
| const prop = node.callee.property | ||||||||||||
| if (!prop || prop.type !== 'Identifier' || !prop.name) { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| const receiver = this.processInstruction(scope, node.callee.object, state) | ||||||||||||
| if (!receiver) { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| const preferredClassNames: string[] = [] | ||||||||||||
| const receiverName = `${receiver?.id || ''}.${receiver?.sid || ''}.${receiver?.qid || ''}` | ||||||||||||
| if (receiverName.includes('SelectAction')) { | ||||||||||||
| preferredClassNames.push('SelectAction') | ||||||||||||
| } | ||||||||||||
| if (receiverName.includes('QueryAction')) { | ||||||||||||
| preferredClassNames.push('QueryAction') | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| for (const className of preferredClassNames) { | ||||||||||||
| const cls = this.findClassInModules(className) | ||||||||||||
| const method = cls?.value?.[prop.name] | ||||||||||||
| if (method?.vtype === 'fclos') { | ||||||||||||
| const methodCopy = _.clone(method) | ||||||||||||
| methodCopy._this = receiver | ||||||||||||
| methodCopy.parent = receiver | ||||||||||||
| methodCopy.object = receiver | ||||||||||||
| methodCopy._qid = `${receiver?.qid || receiver?.sid || className}.${prop.name}` | ||||||||||||
| return methodCopy | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| const modules = this.moduleManager?.field || {} | ||||||||||||
| for (const modKey of Object.keys(modules)) { | ||||||||||||
| const modScope = modules[modKey] | ||||||||||||
| for (const key of Object.keys(modScope?.value || {})) { | ||||||||||||
| const cls = modScope.value[key] | ||||||||||||
| if (cls?.vtype !== 'class') continue | ||||||||||||
| const method = cls?.value?.[prop.name] | ||||||||||||
| if (method?.vtype === 'fclos') { | ||||||||||||
| const methodCopy = _.clone(method) | ||||||||||||
| methodCopy._this = receiver | ||||||||||||
| methodCopy.parent = receiver | ||||||||||||
| methodCopy.object = receiver | ||||||||||||
| methodCopy._qid = `${receiver?.qid || receiver?.sid || key}.${prop.name}` | ||||||||||||
| return methodCopy | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
Comment on lines
+407
to
+422
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This global search for a method across all classes in all modules is computationally expensive. Since this fallback is triggered for unresolved member calls, it could significantly degrade performance in large projects. Consider implementing a global method index or caching the results of this search to improve efficiency. |
||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * Preserve taint on return value for member calls when receiver already carries taint. | ||||||||||||
| * This helps Python dynamic method chains like str.split(...)->list[index] keep dataflow. | ||||||||||||
| */ | ||||||||||||
| propagateTaintFromMemberReceiver(node: any, fclos: any, ret: any) { | ||||||||||||
| if (!ret || !node || node?.callee?.type !== 'MemberAccess') { | ||||||||||||
| return | ||||||||||||
| } | ||||||||||||
| const receiver = fclos?.getThis ? fclos.getThis() : fclos?._this | ||||||||||||
| if (!receiver || !(receiver.hasTagRec || AstUtil.hasTag(receiver, ''))) { | ||||||||||||
| return | ||||||||||||
| } | ||||||||||||
| ret.hasTagRec = true | ||||||||||||
| if (!ret._tags && receiver._tags) { | ||||||||||||
| ret._tags = _.clone(receiver._tags) | ||||||||||||
| } | ||||||||||||
| if (!ret.trace && receiver.trace) { | ||||||||||||
| ret.trace = _.clone(receiver.trace) | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * | ||||||||||||
| * @param scope | ||||||||||||
|
|
@@ -561,13 +650,124 @@ class PythonAnalyzer extends (Analyzer as any) { | |||||||||||
| resolved_prop.name = '_CTOR_' | ||||||||||||
| } | ||||||||||||
| if (!resolved_prop) return defscope | ||||||||||||
| const res = this.getMemberValue(defscope, resolved_prop, state) | ||||||||||||
| let res = this.getMemberValue(defscope, resolved_prop, state) | ||||||||||||
| const taintedIndexFallback = this.tryResolveTaintedIndexFallback(node, defscope, resolved_prop, res) | ||||||||||||
| if (taintedIndexFallback) { | ||||||||||||
| res = taintedIndexFallback | ||||||||||||
| } | ||||||||||||
| const fallbackObject = this.tryResolveObjectsManagerFallback(defscope, resolved_prop, res) | ||||||||||||
| if (fallbackObject) { | ||||||||||||
| res = fallbackObject | ||||||||||||
| } | ||||||||||||
| if (this.checkerManager && (this.checkerManager as any).checkAtMemberAccess) { | ||||||||||||
| this.checkerManager.checkAtMemberAccess(this, defscope, node, state, { res }) | ||||||||||||
| } | ||||||||||||
| return res | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * Python negative-index / computed-index fallback: | ||||||||||||
| * when member lookup fails on a tainted sequence-like value, preserve taint on indexed result. | ||||||||||||
| * This keeps flows like `parts = s.split('__'); parts[-1]` alive even when list elements are not concretely modeled. | ||||||||||||
| */ | ||||||||||||
| tryResolveTaintedIndexFallback(node: any, defscope: any, resolvedProp: any, currentRes: any) { | ||||||||||||
| const unresolved = !currentRes || ['symbol', 'undefine', 'uninitialized'].includes(currentRes.vtype) | ||||||||||||
| if (!unresolved || !defscope) { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| const isComputed = !!node?.computed | ||||||||||||
| const propType = resolvedProp?.type || resolvedProp?.ast?.type | ||||||||||||
| const isIndexLikeProp = | ||||||||||||
| isComputed || propType === 'UnaryExpression' || propType === 'Literal' || propType === 'primitive' | ||||||||||||
| if (!isIndexLikeProp) { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| if (!(defscope.hasTagRec || AstUtil.hasTag(defscope, ''))) { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| const fallback = SymbolValue({ | ||||||||||||
| type: 'MemberAccess', | ||||||||||||
| object: defscope, | ||||||||||||
| property: resolvedProp, | ||||||||||||
| ast: node, | ||||||||||||
| sid: `${defscope?.sid || defscope?.id || 'obj'}[idx]`, | ||||||||||||
| qid: `${defscope?.qid || defscope?.sid || defscope?.id || 'obj'}[idx]`, | ||||||||||||
| }) | ||||||||||||
| fallback.hasTagRec = true | ||||||||||||
| if (!fallback._tags && defscope._tags) { | ||||||||||||
| fallback._tags = _.clone(defscope._tags) | ||||||||||||
| } | ||||||||||||
| if (!fallback.trace && defscope.trace) { | ||||||||||||
| fallback.trace = _.clone(defscope.trace) | ||||||||||||
| } | ||||||||||||
| return fallback | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * Try resolving dynamic ORM-style "<Model>.objects" to QuerySet-like object when direct member lookup fails. | ||||||||||||
| */ | ||||||||||||
| tryResolveObjectsManagerFallback(defscope: any, resolved_prop: any, currentRes: any) { | ||||||||||||
| if (!resolved_prop || resolved_prop.type !== 'Identifier' || resolved_prop.name !== 'objects') { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
| if (!defscope || defscope.vtype !== 'class') { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
| const unresolved = !currentRes || ['symbol', 'undefine', 'uninitialized'].includes(currentRes.vtype) | ||||||||||||
| if (!unresolved) { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| const querySetClass = this.findClassInModules('QuerySet') | ||||||||||||
| if (!querySetClass || querySetClass.vtype !== 'class') { | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| return this.cloneClassMethodsAsObject(querySetClass, `${defscope?.id || 'Model'}_objects`) | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * Find a class symbol by name from processed python modules. | ||||||||||||
| */ | ||||||||||||
| findClassInModules(className: string) { | ||||||||||||
| const modules = this.moduleManager?.field || {} | ||||||||||||
| for (const modKey of Object.keys(modules)) { | ||||||||||||
| const modScope = modules[modKey] | ||||||||||||
| const candidate = modScope?.value?.[className] | ||||||||||||
| if (candidate?.vtype === 'class') { | ||||||||||||
| return candidate | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
| return undefined | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * Create a lightweight object view from class methods to model descriptor-returned manager objects. | ||||||||||||
| */ | ||||||||||||
| cloneClassMethodsAsObject(classClos: any, objectName: string) { | ||||||||||||
| const obj = ObjectValue({ | ||||||||||||
| id: objectName, | ||||||||||||
| sid: objectName, | ||||||||||||
| qid: objectName, | ||||||||||||
| parent: classClos?.parent || this.topScope, | ||||||||||||
| ast: classClos?.ast, | ||||||||||||
| }) | ||||||||||||
| obj._this = obj | ||||||||||||
| obj.vtype = 'object' | ||||||||||||
| for (const fieldName in classClos?.value || {}) { | ||||||||||||
| const v = classClos.value[fieldName] | ||||||||||||
| if (!v) continue | ||||||||||||
| const vCopy = _.clone(v) | ||||||||||||
| vCopy._this = obj | ||||||||||||
| vCopy.parent = obj | ||||||||||||
| obj.value[fieldName] = vCopy | ||||||||||||
| } | ||||||||||||
| return obj | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * | ||||||||||||
| * @param ast | ||||||||||||
|
|
@@ -681,9 +881,33 @@ class PythonAnalyzer extends (Analyzer as any) { | |||||||||||
| processOperator(scope: any, node: any, argvalues: any, operator: any, state: any) { | ||||||||||||
| switch (operator) { | ||||||||||||
| case 'push': { | ||||||||||||
| this.saveVarInCurrentScope(scope, node, argvalues, state) | ||||||||||||
| const has_tag = (scope && scope.hasTagRec) || (argvalues && argvalues.hasTagRec) | ||||||||||||
| // Python list-comprehension in UAST lowers to temp-list + push operations. | ||||||||||||
| // We need append semantics here instead of overwriting temp variable each time. | ||||||||||||
| let container = this.getMemberValueNoCreate(scope, node, state) | ||||||||||||
| if (!container || ['undefine', 'uninitialized', 'symbol'].includes(container.vtype)) { | ||||||||||||
| const sid = node?.name || node?.sid || '__tmp__' | ||||||||||||
| container = ObjectValue({ | ||||||||||||
| id: sid, | ||||||||||||
| sid, | ||||||||||||
| qid: scope?.qid ? `${scope.qid}.${sid}` : sid, | ||||||||||||
| parent: scope, | ||||||||||||
| }) | ||||||||||||
| container.vtype = 'object' | ||||||||||||
| container._this = container | ||||||||||||
| } | ||||||||||||
| if (!container.value || typeof container.value !== 'object') { | ||||||||||||
| container.value = {} | ||||||||||||
| } | ||||||||||||
| const numericIndices = Object.keys(container.value) | ||||||||||||
| .filter((k) => /^\d+$/.test(k)) | ||||||||||||
| .map((k) => Number(k)) | ||||||||||||
| const nextIndex = numericIndices.length > 0 ? Math.max(...numericIndices) + 1 : 0 | ||||||||||||
|
Comment on lines
+901
to
+904
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Calculating the next numeric index by filtering all keys and finding the maximum value on every
Suggested change
|
||||||||||||
| container.value[String(nextIndex)] = argvalues | ||||||||||||
| this.saveVarInCurrentScope(scope, node, container, state) | ||||||||||||
|
|
||||||||||||
| const has_tag = (scope && scope.hasTagRec) || (argvalues && argvalues.hasTagRec) || container?.hasTagRec | ||||||||||||
| if (has_tag) { | ||||||||||||
| container.hasTagRec = has_tag | ||||||||||||
| scope.hasTagRec = has_tag | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The receiver object
node.callee.objecthas already been evaluated during the processing ofnode.callee(which callsprocessMemberAccess). Re-evaluating it here is redundant and could lead to incorrect analysis if the receiver expression has side effects (e.g., a function call). It is safer and more efficient to retrieve the already evaluated receiver fromcurrentFclos.objectif available.