|
1 | 1 | /** |
2 | 2 | * @name Non-constant format string |
3 | | - * @description Passing a non-constant 'format' string to a printf-like function can lead |
| 3 | + * @description Passing a value that is not a string literal 'format' string to a printf-like function can lead |
4 | 4 | * to a mismatch between the number of arguments defined by the 'format' and the number |
5 | 5 | * of arguments actually passed to the function. If the format string ultimately stems |
6 | 6 | * from an untrusted source, this can be used for exploits. |
| 7 | + * This query finds format strings coming from non-literal sources. Note that format strings of |
| 8 | + * type `const char*` it is still considered non-constant if the value is not coming from a string |
| 9 | + * literal. For example, for a parameter with type `const char*` of an exported function that is |
| 10 | + * used as a format string, there is no way to ensure the originating value was a string literal. |
7 | 11 | * @kind problem |
8 | 12 | * @problem.severity recommendation |
9 | 13 | * @security-severity 9.3 |
|
16 | 20 | */ |
17 | 21 |
|
18 | 22 | import semmle.code.cpp.ir.dataflow.TaintTracking |
19 | | -import semmle.code.cpp.models.implementations.GetText |
20 | 23 | import semmle.code.cpp.commons.Printf |
| 24 | +import semmle.code.cpp.security.FlowSources |
| 25 | +import semmle.code.cpp.ir.dataflow.internal.ModelUtil |
| 26 | +import semmle.code.cpp.models.interfaces.DataFlow |
| 27 | +import semmle.code.cpp.models.interfaces.Taint |
| 28 | +import semmle.code.cpp.ir.IR |
21 | 29 |
|
22 | | -// For the following `...gettext` functions, we assume that |
23 | | -// all translations preserve the type and order of `%` specifiers |
24 | | -// (and hence are safe to use as format strings). This |
25 | | -// assumption is hard-coded into the query. |
26 | | -predicate whitelistFunction(Function f, int arg) { |
27 | | - // basic variations of gettext |
28 | | - f.getName() = "_" and arg = 0 |
29 | | - or |
30 | | - exists(FunctionInput input | |
31 | | - f.(GetTextFunction).hasDataFlow(input, _) and |
32 | | - input.isParameterDeref(arg) |
33 | | - ) |
34 | | -} |
35 | | - |
36 | | -// we assume that ALL uses of the `_` macro (and calls to `gettext`) |
37 | | -// return constant string literals |
38 | | -predicate underscoreMacro(Expr e) { |
39 | | - exists(MacroInvocation mi | |
40 | | - mi.getMacroName() = "_" and |
41 | | - mi.getExpr() = e |
42 | | - ) |
43 | | - or |
44 | | - e = any(GetTextFunction gettext).getACallToThisFunction() |
| 30 | +class UncalledFunction extends Function { |
| 31 | + UncalledFunction() { |
| 32 | + not exists(Call c | c.getTarget() = this) and |
| 33 | + // Ignore functions that appear to be function pointers |
| 34 | + // function pointers may be seen as uncalled statically |
| 35 | + not exists(FunctionAccess fa | fa.getTarget() = this) |
| 36 | + } |
45 | 37 | } |
46 | 38 |
|
47 | 39 | /** |
48 | | - * Holds if `t` cannot hold a character array, directly or indirectly. |
| 40 | + * Holds if `node` is a non-constant source of data flow for non-const format string detection. |
| 41 | + * This is defined as either: |
| 42 | + * 1) a `FlowSource` |
| 43 | + * 2) a parameter of an 'uncalled' function |
| 44 | + * 3) an argument to a function with no definition that is not known to define the output through its input |
| 45 | + * 4) an out arg of a function with no definition that is not known to define the output through its input |
| 46 | + * |
| 47 | + * The latter two cases address identifying standard string manipulation libraries as input sources |
| 48 | + * e.g., strcpy. More simply, functions without definitions that are known to manipulate the |
| 49 | + * input to produce an output are not sources. Instead the ultimate source of input to these functions |
| 50 | + * should be considered as the source. |
| 51 | + * |
| 52 | + * False Negative Implication: This approach has false negatives (fails to identify non-const sources) |
| 53 | + * when the source is a field of a struct or object and the initialization is not observed statically. |
| 54 | + * There are 3 general cases where this can occur: |
| 55 | + * 1) Parameters of uncalled functions that are structs/objects and a field is accessed for a format string. |
| 56 | + * 2) A local variable that is a struct/object and initialization of the field occurs in code that is unseen statically. |
| 57 | + * e.g., an object constructor isn't known statically, or a function sets fields |
| 58 | + * of a struct, but the function is not known statically. |
| 59 | + * 3) A function meeting cases (3) and (4) above returns (through an out argument or return value) |
| 60 | + * a struct or object where a field containing a format string has been initialized. |
| 61 | + * |
| 62 | + * Note, uninitialized variables used as format strings are never detected by design. |
| 63 | + * Uninitialized variables are a separate vulnerability concern and should be addressed by a separate query. |
49 | 64 | */ |
50 | | -predicate cannotContainString(Type t, boolean isIndirect) { |
51 | | - isIndirect = false and |
52 | | - exists(Type unspecified | |
53 | | - unspecified = t.getUnspecifiedType() and |
54 | | - not unspecified instanceof UnknownType |
55 | | - | |
56 | | - unspecified instanceof BuiltInType or |
57 | | - unspecified instanceof IntegralOrEnumType |
| 65 | +predicate isNonConst(DataFlow::Node node) { |
| 66 | + node instanceof FlowSource |
| 67 | + or |
| 68 | + // Parameters of uncalled functions that aren't const |
| 69 | + exists(UncalledFunction f, Parameter p | |
| 70 | + f.getAParameter() = p and |
| 71 | + p = node.asParameter() |
58 | 72 | ) |
59 | | -} |
60 | | - |
61 | | -predicate isNonConst(DataFlow::Node node, boolean isIndirect) { |
62 | | - exists(Expr e | |
63 | | - e = node.asExpr() and isIndirect = false |
64 | | - or |
65 | | - e = node.asIndirectExpr() and isIndirect = true |
66 | | - | |
67 | | - exists(FunctionCall fc | fc = e | |
68 | | - not ( |
69 | | - whitelistFunction(fc.getTarget(), _) or |
70 | | - fc.getTarget().hasDefinition() |
71 | | - ) |
72 | | - ) |
73 | | - or |
74 | | - exists(Parameter p | p = e.(VariableAccess).getTarget() | |
75 | | - p.getFunction().getName() = "main" and p.getType() instanceof PointerType |
| 73 | + or |
| 74 | + // Consider as an input any out arg of a function or a function's return where the function is not: |
| 75 | + // 1. a function with a known dataflow or taintflow from input to output and the `node` is the output |
| 76 | + // 2. a function where there is a known definition |
| 77 | + // i.e., functions that with unknown bodies and are not known to define the output through its input |
| 78 | + // are considered as possible non-const sources |
| 79 | + // The function's output must also not be const to be considered a non-const source |
| 80 | + exists(Function func, CallInstruction call | |
| 81 | + // NOTE: could use `Call` getAnArgument() instead of `CallInstruction` but requires two |
| 82 | + // variables representing the same call in ordoer to use `callOutput` below. |
| 83 | + exists(Expr arg | |
| 84 | + call.getPositionalArgumentOperand(_).getDef().getUnconvertedResultExpression() = arg and |
| 85 | + arg = node.asDefiningArgument() |
76 | 86 | ) |
77 | 87 | or |
78 | | - e instanceof CrementOperation |
79 | | - or |
80 | | - e instanceof AddressOfExpr |
81 | | - or |
82 | | - e instanceof ReferenceToExpr |
83 | | - or |
84 | | - e instanceof AssignPointerAddExpr |
85 | | - or |
86 | | - e instanceof AssignPointerSubExpr |
87 | | - or |
88 | | - e instanceof PointerArithmeticOperation |
89 | | - or |
90 | | - e instanceof FieldAccess |
91 | | - or |
92 | | - e instanceof PointerDereferenceExpr |
93 | | - or |
94 | | - e instanceof AddressOfExpr |
95 | | - or |
96 | | - e instanceof ExprCall |
97 | | - or |
98 | | - e instanceof NewArrayExpr |
99 | | - or |
100 | | - exists(Variable v | v = e.(VariableAccess).getTarget() | |
101 | | - v.getType().(ArrayType).getBaseType() instanceof CharType and |
102 | | - exists(AssignExpr ae | |
103 | | - ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v |
104 | | - ) |
| 88 | + call.getUnconvertedResultExpression() = node.asIndirectExpr() |
| 89 | + | |
| 90 | + func = call.getStaticCallTarget() and |
| 91 | + not exists(FunctionOutput output | |
| 92 | + // NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf |
| 93 | + // variant function's output are now possible non-const sources |
| 94 | + pragma[only_bind_out](func).(DataFlowFunction).hasDataFlow(_, output) or |
| 95 | + pragma[only_bind_out](func).(TaintFunction).hasTaintFlow(_, output) |
| 96 | + | |
| 97 | + node = callOutput(call, output) |
105 | 98 | ) |
| 99 | + ) and |
| 100 | + not exists(Call c | |
| 101 | + c.getTarget().hasDefinition() and |
| 102 | + if node instanceof DataFlow::DefinitionByReferenceNode |
| 103 | + then c.getAnArgument() = node.asDefiningArgument() |
| 104 | + else c = [node.asExpr(), node.asIndirectExpr()] |
106 | 105 | ) |
107 | | - or |
108 | | - node instanceof DataFlow::DefinitionByReferenceNode and |
109 | | - isIndirect = true |
110 | | -} |
111 | | - |
112 | | -pragma[noinline] |
113 | | -predicate isBarrierNode(DataFlow::Node node) { |
114 | | - underscoreMacro([node.asExpr(), node.asIndirectExpr()]) |
115 | | - or |
116 | | - exists(node.asExpr()) and |
117 | | - cannotContainString(node.getType(), false) |
118 | 106 | } |
119 | 107 |
|
| 108 | +/** |
| 109 | + * Holds if `sink` is a sink is a format string of any |
| 110 | + * `FormattingFunctionCall`. |
| 111 | + */ |
120 | 112 | predicate isSinkImpl(DataFlow::Node sink, Expr formatString) { |
121 | 113 | [sink.asExpr(), sink.asIndirectExpr()] = formatString and |
122 | 114 | exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex())) |
123 | 115 | } |
124 | 116 |
|
125 | 117 | module NonConstFlowConfig implements DataFlow::ConfigSig { |
126 | | - predicate isSource(DataFlow::Node source) { |
127 | | - exists(boolean isIndirect, Type t | |
128 | | - isNonConst(source, isIndirect) and |
129 | | - t = source.getType() and |
130 | | - not cannotContainString(t, isIndirect) |
131 | | - ) |
132 | | - } |
| 118 | + predicate isSource(DataFlow::Node source) { isNonConst(source) } |
133 | 119 |
|
134 | 120 | predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) } |
135 | 121 |
|
136 | | - predicate isBarrier(DataFlow::Node node) { isBarrierNode(node) } |
| 122 | + predicate isBarrier(DataFlow::Node node) { |
| 123 | + // Ignore tracing non-const through array indices |
| 124 | + exists(ArrayExpr a | a.getArrayOffset() = node.asExpr()) |
| 125 | + } |
137 | 126 | } |
138 | 127 |
|
139 | 128 | module NonConstFlow = TaintTracking::Global<NonConstFlowConfig>; |
140 | 129 |
|
141 | | -from FormattingFunctionCall call, Expr formatString |
| 130 | +from FormattingFunctionCall call, Expr formatString, DataFlow::Node sink |
142 | 131 | where |
143 | 132 | call.getArgument(call.getFormatParameterIndex()) = formatString and |
144 | | - exists(DataFlow::Node sink | |
145 | | - NonConstFlow::flowTo(sink) and |
146 | | - isSinkImpl(sink, formatString) |
147 | | - ) |
| 133 | + NonConstFlow::flowTo(sink) and |
| 134 | + isSinkImpl(sink, formatString) |
148 | 135 | select formatString, |
149 | 136 | "The format string argument to " + call.getTarget().getName() + |
150 | 137 | " should be constant to prevent security issues and other potential errors." |
0 commit comments