diff --git a/resource/checker/checker-config.json b/resource/checker/checker-config.json index 5e28f533..b84019a0 100644 --- a/resource/checker/checker-config.json +++ b/resource/checker/checker-config.json @@ -135,6 +135,16 @@ "checkerPath": "checker/taint/python/django-taint-checker.ts", "description": "python Django框架 entrypoint采集以及框架source添加" }, + { + "checkerId": "taint_flow_python_tornado_input", + "checkerPath": "checker/taint/python/tornado-taint-checker.ts", + "description": "python Tornado框架 entrypoint采集以及框架source添加" + }, + { + "checkerId": "pickle_deserialization_checker", + "checkerPath": "checker/taint/python/pickle-deserialization-checker.ts", + "description": "Python Pickle反序列化漏洞检测器 (CVE-2024-11041)" + }, { "checkerId": "taint_flow_test", "checkerPath": "checker/taint/test-taint-checker.ts", @@ -161,4 +171,4 @@ "checkerPath": "checker/sdk/get-ast-source-code-checker.ts", "description": "获取AST对应的源码" } -] \ No newline at end of file +] diff --git a/resource/checker/checker-pack-config.json b/resource/checker/checker-pack-config.json index 2380e197..580fdd35 100644 --- a/resource/checker/checker-pack-config.json +++ b/resource/checker/checker-pack-config.json @@ -86,6 +86,7 @@ "checkerIds": [ "taint_flow_python_input", "taint_flow_python_django_input", + "taint_flow_python_tornado_input", "callgraph", "sanitizer" ], @@ -96,6 +97,7 @@ "checkerIds": [ "taint_flow_python_input_inner", "taint_flow_python_django_input", + "taint_flow_python_tornado_input", "callgraph", "sanitizer" ], diff --git a/resource/example-rule-config/rule_config_python.json b/resource/example-rule-config/rule_config_python.json index 93de362d..1b866057 100644 --- a/resource/example-rule-config/rule_config_python.json +++ b/resource/example-rule-config/rule_config_python.json @@ -1,6 +1,11 @@ [ { - "checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input"], + "checkerIds": [ + "taint_flow_python_input", + "taint_flow_python_input_inner", + "taint_flow_python_django_input", + "taint_flow_python_tornado_input" + ], "sources": { "FuncCallReturnValueTaintSource": [ { diff --git a/src/checker/taint/python/pickle-deserialization-checker.ts b/src/checker/taint/python/pickle-deserialization-checker.ts new file mode 100755 index 00000000..8ab19f9e --- /dev/null +++ b/src/checker/taint/python/pickle-deserialization-checker.ts @@ -0,0 +1,129 @@ +const IntroduceTaint = require('../common-kit/source-util') +const TaintChecker = require('../taint-checker') +const TaintOutputStrategy = require('../../common/output/taint-output-strategy') +const CommonUtil = require('../../../util/common-util') + +const TAINT_TAG = 'CROSS_BOUNDARY_DATA' +const CHECKER_ID = 'pickle_deserialization_checker' + +/** + * + */ +class PickleDeserializationChecker extends TaintChecker { + entryPoints: any[] + + /** + * + * @param resultManager + */ + constructor(resultManager: any) { + super(resultManager, CHECKER_ID) + this.entryPoints = [] + this.initRules() + } + + /** + * + */ + initRules(): void { + this.checkerRuleConfigContent.sources = {} + this.checkerRuleConfigContent.sinks = {} + } + + /** + * + * @param analyzer + */ + triggerAtStartOfAnalyze(analyzer: any): void { + this.prepareEntryPoints(analyzer) + analyzer.entryPoints.push(...this.entryPoints) + } + + /** + * + * @param analyzer + */ + prepareEntryPoints(analyzer: any): void { + const epHelper = require('../../common/full-callgraph-file-entrypoint') + epHelper.makeFullCallGraph(analyzer) + this.entryPoints.push(...epHelper.getAllEntryPointsUsingCallGraph(analyzer.ainfo?.callgraph)) + this.entryPoints.push(...epHelper.getAllFileEntryPointsUsingFileManager(analyzer.fileManager)) + + CommonUtil.initSourceScopeByTaintSourceWithLoc(this.sourceScope, this.checkerRuleConfigContent.sources?.TaintSource) + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { + const { fclos, argvalues } = info + if (this.isSink(node)) { + const arg0 = argvalues?.[0] + if (arg0?._tags?.has(TAINT_TAG)) { + const finding = this.buildTaintFinding( + CHECKER_ID, + this.desc, + node, + arg0, + fclos, + TAINT_TAG, + 'pickle.loads\nSINK Attribute: UnsafeDeserialization', + [], + undefined + ) + if (TaintOutputStrategy.isNewFinding(this.resultManager, finding)) { + this.resultManager.newFinding(finding, TaintOutputStrategy.outputStrategyId) + } + } + } + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { + const { fclos, ret } = info + + if (this.isSource(node) && ret) { + IntroduceTaint.setTaint(ret, [TAINT_TAG]) + ret.trace = ret.trace || [] + ret.trace.push({ node, type: 'SOURCE' }) + } + if (fclos?.object?._tags?.has(TAINT_TAG) && ret) { + IntroduceTaint.setTaint(ret, [TAINT_TAG]) + if (fclos.object.trace && !ret.trace) { + ret.trace = fclos.object.trace + } + } + } + + /** + * + * @param node + */ + isSource(node: any): boolean { + const callee = node?.callee + return callee?.type === 'MemberAccess' && callee?.property?.name === 'socket' + } + + /** + * + * @param node + */ + isSink(node: any): boolean { + const callee = node?.callee + return callee?.type === 'MemberAccess' && callee?.object?.name === 'pickle' && callee?.property?.name === 'loads' + } +} + +module.exports = PickleDeserializationChecker diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts new file mode 100644 index 00000000..c34c055d --- /dev/null +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -0,0 +1,290 @@ +const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') +const Config = require('../../../config') +const completeEntryPoint = require('../common-kit/entry-points-util') +const { markTaintSource } = require('../common-kit/source-util') +const { isTornadoCall, tornadoSourceAPIs, isRequestAttributeAccess, extractTornadoParams } = require('./tornado-util') +const { extractRelativePath } = require('../../../util/file-util') + +// Metadata storage +const tornadoRoutesMap = new WeakMap() +const tornadoRouteMap = new WeakMap() +const tornadoPathMap = new WeakMap() + +/** + * Tornado Taint Checker - Simplified + */ +class TornadoTaintChecker extends PythonTaintAbstractChecker { + /** + * + * @param resultManager + */ + constructor(resultManager: any) { + super(resultManager, 'taint_flow_python_tornado_input') + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { + this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { + super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) + const { fclos, argvalues } = info + if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return + const isApp = isTornadoCall(node, 'Application') + const isRouter = isTornadoCall(node, 'RuleRouter') + const isAdd = isTornadoCall(node, 'add_handlers') + if (isApp || isRouter || isAdd) { + let routes: any = null + if (isApp || isRouter) { + const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) + routes = (isInit && argvalues[1]) || argvalues[0] + } else { + routes = argvalues[1] // isAdd case + } + if (routes) { + this.registerRoutesFromValue(analyzer, scope, state, routes) + } + } + } + + /** + * Register routes from a collection value (List/Dict/Union/Single Symbol) + * @param analyzer + * @param scope + * @param state + * @param val + * @param prefix + */ + private registerRoutesFromValue(analyzer: any, scope: any, state: any, val: any, prefix = '') { + if (!val) return + // 1. Handle recording optimization (tornadoRoute) + if (tornadoRouteMap.has(val)) { + const { path, handler } = tornadoRouteMap.get(val)! + if (path && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + return + } + } + // 2. Handle Union + if (val.vtype === 'union' && Array.isArray(val.value)) { + // Small optimization: if this union contains exactly a string and something else, it might be a flattened tuple + const pathVal = val.value.find( + (v: any) => tornadoPathMap.has(v) || typeof v.value === 'string' || typeof v.ast?.value === 'string' + ) + const hVal = val.value.find((v: any) => v.vtype === 'class' || v.vtype === 'symbol' || v.vtype === 'object') + if (pathVal && hVal) { + const path = tornadoPathMap.get(pathVal) || pathVal.value || pathVal.ast?.value + if (typeof path === 'string') { + this.finishRoute(analyzer, scope, state, hVal, prefix + path) + return + } + } + val.value.forEach((v: any) => this.registerRoutesFromValue(analyzer, scope, state, v, prefix)) + return + } + // 3. Handle raw tuple (path, handler) + if (val.value && typeof val.value === 'object') { + const pathArg = val.value['0'] + const handler = val.value['1'] + const path = (pathArg && tornadoPathMap.get(pathArg)) || pathArg?.value || pathArg?.ast?.value + if (typeof path === 'string' && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + return + } + } + // 4. Handle Collections (List/Object with numeric keys) + const isObject = val.vtype === 'object' && val.value + if (isObject) { + const isCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) + if (isCollection) { + const items = Array.isArray(val.value) ? val.value : Object.values(val.value) + items.forEach((item: any) => this.registerRoutesFromValue(analyzer, scope, state, item, prefix)) + } + } + } + + /** + * + * @param analyzer + * @param scope + * @param state + * @param h + * @param path + */ + private finishRoute(analyzer: any, scope: any, state: any, h: any, path: string) { + if (!h) return + if (h.vtype === 'union' && Array.isArray(h.value)) h = h.value[0] + // 1. Check for recorded nested routes (Application/Router instances) + const innerRoutes = tornadoRoutesMap.get(h) || (h.value && tornadoRoutesMap.get(h.value)) + if (innerRoutes) { + this.registerRoutesFromValue(analyzer, scope, state, innerRoutes, path) + return + } + // 2. Handle Class Definition (Handler classes) + let cls = h + if (cls.vtype !== 'class' && cls.ast?.type === 'ClassDefinition') { + try { + cls = analyzer.processInstruction(scope, cls.ast, state) || this.buildClassSymbol(cls.ast) + } catch (e) { + cls = this.buildClassSymbol(cls.ast) + } + } else if (cls.vtype === 'symbol' && cls.cdef) { + // If it's an instance symbol, get its class definition + cls = cls.cdef + } + if (path && cls && (cls.vtype === 'class' || cls.vtype === 'symbol')) { + this.registerEntryPoints(analyzer, cls, path) + } + } + + /** + * + * @param analyzer + * @param cls + * @param path + */ + private registerEntryPoints(analyzer: any, cls: any, path: string) { + const methods = ['get', 'post', 'put', 'delete', 'patch'] + // Look for methods in cls.value, cls.field, or cls.value.field (Python specificity) + const classValue = cls.value?.field || cls.field || cls.value || {} + Object.entries(classValue).forEach(([name, fclos]: [string, any]) => { + if (methods.includes(name)) { + const ep = completeEntryPoint(fclos) + if (ep) { + ep.funcReceiverType = cls.ast?.id?.name || cls.sid || 'Unknown' + const isDuplicate = analyzer.entryPoints.some( + (existing: any) => + existing.functionName === ep.functionName && + existing.filePath === ep.filePath && + existing.funcReceiverType === ep.funcReceiverType + ) + if (!isDuplicate) { + analyzer.entryPoints.push(ep) + } + const info = extractTornadoParams(path) + let paramIdx = 0 + const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] + actualParams.forEach((p: any) => { + const pName = p.id?.name || p.name + if (pName === 'self') return + paramIdx++ + // Add source scope for parameters based on URL pattern + if (info.named.includes(pName) || (info.named.length === 0 && paramIdx <= info.positionalCount)) { + this.sourceScope.value.push({ + path: pName, + kind: 'PYTHON_INPUT', + scopeFile: extractRelativePath(fclos?.ast?.loc?.sourcefile || ep.filePath, Config.maindir), + scopeFunc: ep.functionName, + locStart: p.loc?.start?.line, + locEnd: p.loc?.end?.line, + }) + } + }) + } + } + }) + } + + /** + * + * @param node + */ + private buildClassSymbol(node: any) { + const value: any = {} + node.body?.forEach((m: any) => { + if (m.type === 'FunctionDefinition') { + const name = m.id?.name || m.name?.name + if (name) { + value[name] = { + vtype: 'fclos', + fdef: m, + ast: m, + } + } + } + }) + return { vtype: 'class', value, ast: node } + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { + super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) + const { fclos, ret, argvalues } = info + if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return + const name = node.callee?.property?.name || node.callee?.name + // 1. Record route info for Rule, URLSpec, url (Recording phase) + const isRuleCall = isTornadoCall(node, 'Rule') || isTornadoCall(node, 'URLSpec') || name === 'url' + if (isRuleCall && argvalues && argvalues.length >= 2) { + const pArg = argvalues[0] + const path = (pArg && tornadoPathMap.get(pArg)) || pArg?.value + const handler = argvalues[1] + tornadoRouteMap.set(ret, { path, handler }) + } + // 2. Record path for PathMatches + if (isTornadoCall(node, 'PathMatches') && argvalues && argvalues.length >= 1) { + const path = argvalues[0]?.value + if (typeof path === 'string') { + tornadoPathMap.set(ret, path) + } + } + // 3. Record internal routes for Application/RuleRouter instances + const isInit = ['__init__', '_CTOR_'].includes(name) + if (isInit && argvalues && argvalues.length >= 2) { + const self = argvalues[0] + const routes = argvalues[1] + // Heuristic: if routes looks like a list/tuple of routes + const isRouteList = + routes && (routes.vtype === 'object' || routes.vtype === 'symbol' || Array.isArray(routes.value)) + if (isRouteList && self) { + tornadoRoutesMap.set(self, routes) + } + } + const isApp = isTornadoCall(node, 'Application') + const isRouter = isTornadoCall(node, 'RuleRouter') + if (!isInit && (isApp || isRouter)) { + tornadoRoutesMap.set(ret, argvalues[0]) + } + if (tornadoSourceAPIs.has(name)) { + markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) + } + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { + if (Config.entryPointMode !== 'ONLY_CUSTOM' && isRequestAttributeAccess(node)) { + markTaintSource(info.res, { path: node, kind: 'PYTHON_INPUT' }) + } + } +} + +export = TornadoTaintChecker diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts new file mode 100644 index 00000000..618a49a3 --- /dev/null +++ b/src/checker/taint/python/tornado-util.ts @@ -0,0 +1,80 @@ +/** + * Tornado Source APIs + */ +export const tornadoSourceAPIs = new Set([ + 'get_argument', + 'get_query_argument', + 'get_body_argument', + 'get_query_arguments', + 'get_body_arguments', + 'get_cookie', + 'get_secure_cookie', + 'get_arguments', + 'get_json_body', +]) + +/** + * Detect if node is an access to a Tornado request attribute + * @param node + */ +export function isRequestAttributeAccess(node: any): boolean { + if (node?.type !== 'MemberAccess') return false + const inner = node.object + return ( + inner?.type === 'MemberAccess' && + inner.object?.type === 'Identifier' && + inner.object?.name === 'self' && + inner.property?.name === 'request' && + [ + 'body', + 'query', + 'headers', + 'cookies', + 'files', + 'uri', + 'path', + 'arguments', + 'remote_ip', + 'host', + 'query_arguments', + 'body_arguments', + ].includes(node.property?.name) + ) +} + +/** + * Check if node is a Tornado Application call + * @param node + * @param targetName + */ +export function isTornadoCall(node: any, targetName: string): boolean { + if (!node || node.type !== 'CallExpression') return false + const { callee } = node + const funcName = callee.property?.name || callee.name + const objectName = callee.object?.name || callee.object?.property?.name + if (funcName === targetName || objectName === targetName) { + return true + } + if (['__init__', '_CTOR_'].includes(funcName)) { + let current = callee.object + while (current) { + const currentName = current.name || current.property?.name + if (currentName === targetName) return true + current = current.object || current.callee + } + } + return false +} + +/** + * Extract parameter info from URL regex patterns + * @param pattern + */ +export function extractTornadoParams(pattern: string): { named: string[]; positionalCount: number } { + if (!pattern) return { named: [], positionalCount: 0 } + const named = Array.from(pattern.matchAll(/\(\?P<(\w+)>/g)).map((m) => m[1]) + if (named.length > 0) return { named, positionalCount: 0 } + const cleaned = pattern.replace(/\\\(|\\\)/g, '') + const positionalCount = (cleaned.match(/\((?!\?)/g) || []).length + return { named: [], positionalCount } +} diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index 427ad6df..47833e07 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -561,7 +561,11 @@ class PythonAnalyzer extends (Analyzer as any) { resolved_prop.name = '_CTOR_' } if (!resolved_prop) return defscope - return this.getMemberValue(defscope, resolved_prop, state) + const res = this.getMemberValue(defscope, resolved_prop, state) + if (this.checkerManager && (this.checkerManager as any).checkAtMemberAccess) { + this.checkerManager.checkAtMemberAccess(this, defscope, node, state, { res }) + } + return res } /**