diff --git a/python/ql/src/Expressions/ContainsNonContainer.ql b/python/ql/src/Expressions/ContainsNonContainer.ql index de8c38795675..cba1a945ee23 100644 --- a/python/ql/src/Expressions/ContainsNonContainer.ql +++ b/python/ql/src/Expressions/ContainsNonContainer.ql @@ -12,25 +12,75 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.ApiGraphs -predicate rhs_in_expr(ControlFlowNode rhs, Compare cmp) { - exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs.getNode() | +predicate rhs_in_expr(Expr rhs, Compare cmp) { + exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs | op instanceof In or op instanceof NotIn ) } -from - ControlFlowNodeWithPointsTo non_seq, Compare cmp, Value v, ClassValue cls, ControlFlowNode origin +/** + * Holds if `origin` is the result of applying a class as a decorator to a function. + * Such decorator classes act as proxies, and the runtime value of the decorated + * attribute may be of a different type than the decorator class itself. + */ +predicate isDecoratorApplication(DataFlow::LocalSourceNode origin) { + exists(FunctionExpr fe | origin.asExpr() = fe.getADecoratorCall()) +} + +/** + * Holds if `cls` has methods dynamically added via `setattr`, so we cannot + * statically determine its full interface. + */ +predicate hasDynamicMethods(Class cls) { + exists(CallNode setattr_call | + setattr_call.getFunction().(NameNode).getId() = "setattr" and + setattr_call.getArg(0).(NameNode).getId() = cls.getName() and + setattr_call.getScope() = cls.getScope() + ) +} + +/** + * Holds if `cls_arg` references a known container builtin type, either directly + * or as an element of a tuple. + */ +private predicate isContainerTypeArg(DataFlow::Node cls_arg) { + cls_arg = + API::builtin(["list", "tuple", "set", "frozenset", "dict", "str", "bytes", "bytearray"]) + .getAValueReachableFromSource() + or + isContainerTypeArg(DataFlow::exprNode(cls_arg.asExpr().(Tuple).getAnElt())) +} + +/** + * Holds if `rhs` is guarded by an `isinstance` check that tests for + * a container type. + */ +predicate guardedByIsinstanceContainer(DataFlow::Node rhs) { + exists( + DataFlow::GuardNode guard, DataFlow::CallCfgNode isinstance_call, DataFlow::LocalSourceNode src + | + isinstance_call = API::builtin("isinstance").getACall() and + src.flowsTo(isinstance_call.getArg(0)) and + src.flowsTo(rhs) and + isContainerTypeArg(isinstance_call.getArg(1)) and + guard = isinstance_call.asCfgNode() and + guard.controlsBlock(rhs.asCfgNode().getBasicBlock(), true) + ) +} + +from Compare cmp, DataFlow::LocalSourceNode origin, DataFlow::Node rhs, Class cls where - rhs_in_expr(non_seq, cmp) and - non_seq.pointsTo(_, v, origin) and - v.getClass() = cls and - not Types::failedInference(cls, _) and - not cls.hasAttribute("__contains__") and - not cls.hasAttribute("__iter__") and - not cls.hasAttribute("__getitem__") and - not cls = ClassValue::nonetype() and - not cls = Value::named("types.MappingProxyType") + origin = classInstanceTracker(cls) and + origin.flowsTo(rhs) and + not DuckTyping::isContainer(cls) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and + not isDecoratorApplication(origin) and + not hasDynamicMethods(cls) and + not guardedByIsinstanceContainer(rhs) and + rhs_in_expr(rhs.asExpr(), cmp) select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin, "target", cls, cls.getName() diff --git a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected index cf6d78d0d36b..132852c73f1c 100644 --- a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected +++ b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected @@ -1,2 +1,2 @@ -| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | -| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | +| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | +| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | diff --git a/python/ql/test/query-tests/Expressions/general/expressions_test.py b/python/ql/test/query-tests/Expressions/general/expressions_test.py index 5e07b58e2041..f9d522d328b3 100644 --- a/python/ql/test/query-tests/Expressions/general/expressions_test.py +++ b/python/ql/test/query-tests/Expressions/general/expressions_test.py @@ -279,3 +279,62 @@ def local(): def apply(f): pass apply(foo)([1]) + +# Class used as a decorator: the runtime value at attribute access is the +# function's return value, not the decorator class instance. +class cached_property(object): + def __init__(self, func): + self.func = func + def __get__(self, obj, cls): + val = self.func(obj) + setattr(obj, self.func.__name__, val) + return val + +class MyForm(object): + @cached_property + def changed_data(self): + return [1, 2, 3] + +def test_decorator_class(form): + f = MyForm() + # OK: cached_property is a descriptor; the actual runtime value is a list. + if "name" in f.changed_data: + pass + +# Class with dynamically added methods via setattr: we cannot statically +# determine its full interface, so we should not flag it. +class DynamicProxy(object): + def __init__(self, args): + self._args = args + +for method_name in ["__contains__", "__iter__", "__len__"]: + def wrapper(self, *args, __method_name=method_name): + pass + setattr(DynamicProxy, method_name, wrapper) + +def test_dynamic_methods(): + proxy = DynamicProxy(()) + # OK: __contains__ is added dynamically via setattr. + if "name" in proxy: + pass + +# isinstance guard should suppress non-container warning +def guarded_contains(x): + obj = XIter() + if isinstance(obj, dict): + if x in obj: # OK: guarded by isinstance + pass + +def guarded_contains_tuple(x): + obj = XIter() + if isinstance(obj, (list, dict, set)): + if x in obj: # OK: guarded by isinstance with tuple of types + pass + +# Negated isinstance guard: early return when NOT a container +def guarded_contains_negated(x): + obj = XIter() + if not isinstance(obj, dict): + return + if x in obj: # OK: guarded by negated isinstance + early return + pass