github · MathiasVP · Nov 10, 2023 · Nov 8, 2023 · Nov 8, 2023 · Nov 9, 2023
@@ -15,9 +15,10 @@
  */
 
 import semmle.code.cpp.security.BufferWrite
-import semmle.code.cpp.security.Security
-import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl
-import TaintedWithPath
+import semmle.code.cpp.security.FlowSources as FS
+import semmle.code.cpp.dataflow.new.TaintTracking
+import semmle.code.cpp.controlflow.IRGuards
+import Flow::PathGraph
 
 /*
  * --- Summary of CWE-120 alerts ---
@@ -47,15 +48,6 @@ predicate isUnboundedWrite(BufferWrite bw) {
   not exists(bw.getMaxData(_)) // and we can't deduce an upper bound to the amount copied
 }
 
-/*
- * predicate isMaybeUnboundedWrite(BufferWrite bw)
- * {
- *   not bw.hasExplicitLimit()                           // has no explicit size limit
- *   and exists(bw.getMaxData())                         // and we can deduce an upper bound to the amount copied
- *   and (not exists(getBufferSize(bw.getDest(), _)))    // but we can't work out the size of the destination to be sure
- * }
- */
-
 /**
  * Holds if `e` is a source buffer going into an unbounded write `bw` or a
  * qualifier of (a qualifier of ...) such a source.
@@ -66,19 +58,43 @@ predicate unboundedWriteSource(Expr e, BufferWrite bw) {
   exists(FieldAccess fa | unboundedWriteSource(fa, bw) and e = fa.getQualifier())
 }
 
-/*
- * --- user input reach ---
- */
+predicate isSource(FS::FlowSource source, string sourceType) { source.getSourceType() = sourceType }
+
+predicate isSink(DataFlow::Node sink, BufferWrite bw) {
+  unboundedWriteSource(sink.asIndirectExpr(), bw)
+  or
+  // `gets` and `scanf` reads from stdin so there's no real input.
+  // The `BufferWrite` library models this as the call itself being
+  // the source. In this case we mark the output argument as being
+  // the sink so that we report a path where source = sink (because
+  // the same output argument is also included in `isSource`).
+  bw.getASource() = bw and
+  unboundedWriteSource(sink.asDefiningArgument(), bw)
+}
+
+predicate lessThanOrEqual(IRGuardCondition g, Expr e, boolean branch) {
+  exists(Operand left |
+    g.comparesLt(left, _, _, true, branch) or
+    g.comparesEq(left, _, _, true, branch)
+  |
+    left.getDef().getUnconvertedResultExpression() = e
+  )
+}
+
+module Config implements DataFlow::ConfigSig {
+  predicate isSource(DataFlow::Node source) { isSource(source, _) }
 
-class Configuration extends TaintTrackingConfiguration {
-  override predicate isSink(Element tainted) { unboundedWriteSource(tainted, _) }
+  predicate isSink(DataFlow::Node sink) { isSink(sink, _) }
 
-  override predicate taintThroughGlobals() { any() }
+  predicate isBarrierOut(DataFlow::Node node) { isSink(node) }
+
+  predicate isBarrier(DataFlow::Node node) {
+    // Block flow if the node is guarded by any <, <= or = operations.
+    node = DataFlow::BarrierGuard<lessThanOrEqual/3>::getABarrierNode()
+  }
 }
 
-/*
- * --- put it together ---
- */
+module Flow = TaintTracking::Global<Config>;
 
 /*
  * An unbounded write is, for example `strcpy(..., tainted)`. We're looking
@@ -87,17 +103,20 @@ class Configuration extends TaintTrackingConfiguration {
  *
  * In the case of `gets` and `scanf`, where the source buffer is implicit, the
  * `BufferWrite` library reports the source buffer to be the same as the
- * destination buffer. Since those destination-buffer arguments are also
- * modeled in the taint-tracking library as being _sources_ of taint, they are
- * in practice reported as being tainted because the `security.TaintTracking`
- * library does not distinguish between taint going into an argument and out of
- * an argument. Thus, we get the desired alerts.
+ * destination buffer. So to report an alert on a pattern like:
+ * ```
+ * char s[32];
+ * gets(s);
+ * ```
+ * we define the sink as the node corresponding to the output argument of `gets`.
+ * This gives us a path where the source is equal to the sink.
  */
 
-from BufferWrite bw, Expr inputSource, Expr tainted, PathNode sourceNode, PathNode sinkNode
+from BufferWrite bw, Flow::PathNode source, Flow::PathNode sink, string sourceType
 where
-  taintedWithPath(inputSource, tainted, sourceNode, sinkNode) and
-  unboundedWriteSource(tainted, bw)
-select bw, sourceNode, sinkNode,
-  "This '" + bw.getBWDesc() + "' with input from $@ may overflow the destination.", inputSource,
-  inputSource.toString()
+  Flow::flowPath(source, sink) and
+  isSource(source.getNode(), sourceType) and
+  isSink(sink.getNode(), bw)
+select bw, source, sink,
+  "This '" + bw.getBWDesc() + "' with input from $@ may overflow the destination.",
+  source.getNode(), sourceType
@@ -1,37 +1,18 @@
 edges
-| tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array |
-| tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array |
-| tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array |
-| tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array |
-| tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array |
-| tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array |
-| tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array |
-| tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array |
-| tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array |
-| tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array |
-| tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array |
-| tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array |
-subpaths
+| tests.c:16:26:16:29 | argv indirection | tests.c:28:22:28:28 | access to array indirection |
+| tests.c:16:26:16:29 | argv indirection | tests.c:29:28:29:34 | access to array indirection |
+| tests.c:16:26:16:29 | argv indirection | tests.c:34:10:34:16 | access to array indirection |
 nodes
-| tests.c:28:22:28:25 | argv | semmle.label | argv |
-| tests.c:28:22:28:25 | argv | semmle.label | argv |
-| tests.c:28:22:28:28 | access to array | semmle.label | access to array |
-| tests.c:28:22:28:28 | access to array | semmle.label | access to array |
-| tests.c:29:28:29:31 | argv | semmle.label | argv |
-| tests.c:29:28:29:31 | argv | semmle.label | argv |
-| tests.c:29:28:29:34 | access to array | semmle.label | access to array |
-| tests.c:29:28:29:34 | access to array | semmle.label | access to array |
-| tests.c:31:15:31:23 | buffer100 | semmle.label | buffer100 |
-| tests.c:31:15:31:23 | buffer100 | semmle.label | buffer100 |
-| tests.c:33:21:33:29 | buffer100 | semmle.label | buffer100 |
-| tests.c:33:21:33:29 | buffer100 | semmle.label | buffer100 |
-| tests.c:34:10:34:13 | argv | semmle.label | argv |
-| tests.c:34:10:34:13 | argv | semmle.label | argv |
-| tests.c:34:10:34:16 | access to array | semmle.label | access to array |
-| tests.c:34:10:34:16 | access to array | semmle.label | access to array |
+| tests.c:16:26:16:29 | argv indirection | semmle.label | argv indirection |
+| tests.c:28:22:28:28 | access to array indirection | semmle.label | access to array indirection |
+| tests.c:29:28:29:34 | access to array indirection | semmle.label | access to array indirection |
+| tests.c:31:15:31:23 | scanf output argument | semmle.label | scanf output argument |
+| tests.c:33:21:33:29 | scanf output argument | semmle.label | scanf output argument |
+| tests.c:34:10:34:16 | access to array indirection | semmle.label | access to array indirection |
+subpaths
 #select
-| tests.c:28:3:28:9 | call to sprintf | tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array | This 'call to sprintf' with input from $@ may overflow the destination. | tests.c:28:22:28:25 | argv | argv |
-| tests.c:29:3:29:9 | call to sprintf | tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array | This 'call to sprintf' with input from $@ may overflow the destination. | tests.c:29:28:29:31 | argv | argv |
-| tests.c:31:15:31:23 | buffer100 | tests.c:31:15:31:23 | buffer100 | tests.c:31:15:31:23 | buffer100 | This 'scanf string argument' with input from $@ may overflow the destination. | tests.c:31:15:31:23 | buffer100 | buffer100 |
-| tests.c:33:21:33:29 | buffer100 | tests.c:33:21:33:29 | buffer100 | tests.c:33:21:33:29 | buffer100 | This 'scanf string argument' with input from $@ may overflow the destination. | tests.c:33:21:33:29 | buffer100 | buffer100 |
-| tests.c:34:25:34:33 | buffer100 | tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array | This 'sscanf string argument' with input from $@ may overflow the destination. | tests.c:34:10:34:13 | argv | argv |
+| tests.c:28:3:28:9 | call to sprintf | tests.c:16:26:16:29 | argv indirection | tests.c:28:22:28:28 | access to array indirection | This 'call to sprintf' with input from $@ may overflow the destination. | tests.c:16:26:16:29 | argv indirection | a command-line argument |
+| tests.c:29:3:29:9 | call to sprintf | tests.c:16:26:16:29 | argv indirection | tests.c:29:28:29:34 | access to array indirection | This 'call to sprintf' with input from $@ may overflow the destination. | tests.c:16:26:16:29 | argv indirection | a command-line argument |
+| tests.c:31:15:31:23 | buffer100 | tests.c:31:15:31:23 | scanf output argument | tests.c:31:15:31:23 | scanf output argument | This 'scanf string argument' with input from $@ may overflow the destination. | tests.c:31:15:31:23 | scanf output argument | value read by scanf |
+| tests.c:33:21:33:29 | buffer100 | tests.c:33:21:33:29 | scanf output argument | tests.c:33:21:33:29 | scanf output argument | This 'scanf string argument' with input from $@ may overflow the destination. | tests.c:33:21:33:29 | scanf output argument | value read by scanf |
+| tests.c:34:25:34:33 | buffer100 | tests.c:16:26:16:29 | argv indirection | tests.c:34:10:34:16 | access to array indirection | This 'sscanf string argument' with input from $@ may overflow the destination. | tests.c:16:26:16:29 | argv indirection | a command-line argument |