Skip to content

Commit f2c630e

Browse files
committed
fix(iast): avoid excessive filtering of stacktrace locations
1 parent e55ca19 commit f2c630e

File tree

8 files changed

+201
-210
lines changed

8 files changed

+201
-210
lines changed

ddtrace/appsec/_iast/_stacktrace.c

Lines changed: 171 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ static __thread int in_stacktrace = 0;
99
#include <Python.h>
1010
#include <frameobject.h>
1111
#include <patchlevel.h>
12+
#include <stdbool.h>
1213

1314
#ifdef _WIN32
1415
#define DD_TRACE_INSTALLED_PREFIX "\\ddtrace\\"
@@ -25,6 +26,7 @@ static __thread int in_stacktrace = 0;
2526
#define GET_LINENO(frame) PyFrame_GetLineNumber((PyFrameObject*)frame)
2627
#define GET_FRAME(tstate) PyThreadState_GetFrame(tstate)
2728
#define GET_PREVIOUS(frame) PyFrame_GetBack(frame)
29+
#define FRAME_INCREF(frame) Py_INCREF((PyObject*)frame)
2830
#define FRAME_DECREF(frame) Py_DecRef((PyObject*)frame)
2931
#define FRAME_XDECREF(frame) Py_XDECREF((PyObject*)frame)
3032
#define FILENAME_DECREF(filename) Py_DecRef(filename)
@@ -68,6 +70,7 @@ GET_FUNCTION(PyFrameObject* frame)
6870
#define GET_FRAME(tstate) tstate->frame
6971
#define GET_PREVIOUS(frame) frame->f_back
7072
#define GET_FILENAME(frame) ((PyObject*)(frame->f_code->co_filename))
73+
#define FRAME_INCREF(frame)
7174
#define FRAME_DECREF(frame)
7275
#define FRAME_XDECREF(frame)
7376
#define FILENAME_DECREF(filename)
@@ -88,6 +91,14 @@ GET_FUNCTION(PyFrameObject* frame)
8891
#endif
8992
#endif
9093

94+
// Python standard library path
95+
static char* STDLIB_PATH = NULL;
96+
static ssize_t STDLIB_PATH_LEN = 0;
97+
98+
// Python site-packages path
99+
static char* PURELIB_PATH = NULL;
100+
static ssize_t PURELIB_PATH_LEN = 0;
101+
91102
static inline PyObject*
92103
SAFE_GET_LOCALS(PyFrameObject* frame)
93104
{
@@ -121,115 +132,191 @@ GET_CLASS(PyFrameObject* frame)
121132
}
122133

123134
/**
124-
* get_file_and_line
125-
*
126-
* Get the filename, line number, function name and class name of the original wrapped
127-
* function to report it.
128-
*
129-
* Returns a tuple:
130-
* (filename, line_number, function name, class name)
131-
**/
132-
static PyObject*
133-
get_file_and_line(PyObject* Py_UNUSED(module), PyObject* cwd_obj)
135+
* Checks if the filename is special.
136+
* For example, a frozen module (`<frozen 'os'>`), a template (`<template>`), etc.
137+
*/
138+
static inline bool
139+
_is_special_frame(const char* filename)
134140
{
135-
// Mark that we are now capturing a stack trace to avoid reentrant calls on GET_LOCALS
136-
in_stacktrace = 1;
141+
return filename && strncmp(filename, "<", strlen("<")) == 0;
142+
}
137143

138-
PyThreadState* tstate = PyThreadState_Get();
139-
if (!tstate) {
140-
goto exit_0;
141-
}
144+
static inline bool
145+
_is_ddtrace_filename(const char* filename)
146+
{
147+
return filename && strstr(filename, DD_TRACE_INSTALLED_PREFIX) != NULL && strstr(filename, TESTS_PREFIX) == NULL;
148+
}
142149

143-
int line;
144-
PyObject* filename_o = NULL;
145-
PyObject* result = NULL;
146-
PyObject* cwd_bytes = NULL;
147-
char* cwd = NULL;
150+
static inline bool
151+
_is_site_packages_filename(const char* filename)
152+
{
153+
const bool res = filename && PURELIB_PATH && strncmp(filename, PURELIB_PATH, PURELIB_PATH_LEN) == 0;
154+
return res;
155+
}
156+
157+
static inline bool
158+
_is_stdlib_filename(const char* filename)
159+
{
160+
// site-packages is often a subdirectory of stdlib directory, so stdlib
161+
// path is defined as prefixed by stdlib and not prefixed by purelib.
162+
// TODO: As of Python 3.10, we could use sys.stdlib_module_names.
163+
const bool res = filename && STDLIB_PATH && !_is_site_packages_filename(filename) &&
164+
strncmp(filename, STDLIB_PATH, STDLIB_PATH_LEN) == 0;
165+
return res;
166+
}
148167

149-
if (!PyUnicode_FSConverter(cwd_obj, &cwd_bytes)) {
150-
goto exit_0;
168+
static char*
169+
get_sysconfig_path(const char* name)
170+
{
171+
PyObject* sysconfig_mod = PyImport_ImportModule("sysconfig");
172+
if (!sysconfig_mod) {
173+
return NULL;
151174
}
152-
cwd = PyBytes_AsString(cwd_bytes);
153-
if (!cwd) {
154-
goto exit_0;
175+
176+
PyObject* path = PyObject_CallMethod(sysconfig_mod, "get_path", "s", name);
177+
if (!path) {
178+
Py_DECREF(sysconfig_mod);
179+
return NULL;
155180
}
156181

157-
PyFrameObject* frame = GET_FRAME(tstate);
158-
if (!frame) {
159-
goto exit_0;
182+
const char* path_str = PyUnicode_AsUTF8(path);
183+
char* res = NULL;
184+
if (path_str) {
185+
res = strdup(path_str);
160186
}
187+
Py_DECREF(path);
188+
Py_DECREF(sysconfig_mod);
189+
return res;
190+
}
161191

192+
/**
193+
* Gets a reference to a PyFrameObject and walks up the stack until a relevant frame is found.
194+
*
195+
* Returns a new reference to the PyFrameObject.
196+
*
197+
* The caller is not responsible for DECREF'ing the given PyFrameObject, but it is responsible for
198+
* DECREF'ing the returned PyFrameObject.
199+
*/
200+
static PyFrameObject*
201+
_find_relevant_frame(PyFrameObject* frame, bool allow_site_packages)
202+
{
162203
while (NULL != frame) {
163-
filename_o = GET_FILENAME(frame);
204+
PyObject* filename_o = GET_FILENAME(frame);
164205
if (!filename_o) {
165-
goto exit;
206+
FRAME_DECREF(frame);
207+
return NULL;
166208
}
167209
const char* filename = PyUnicode_AsUTF8(filename_o);
168-
if (((strstr(filename, DD_TRACE_INSTALLED_PREFIX) != NULL && strstr(filename, TESTS_PREFIX) == NULL)) ||
169-
(strstr(filename, SITE_PACKAGES_PREFIX) != NULL || strstr(filename, cwd) == NULL)) {
210+
if (_is_special_frame(filename) || _is_ddtrace_filename(filename) || _is_stdlib_filename(filename) ||
211+
(!allow_site_packages && _is_site_packages_filename(filename))) {
170212
PyFrameObject* prev_frame = GET_PREVIOUS(frame);
171213
FRAME_DECREF(frame);
172214
FILENAME_DECREF(filename_o);
173215
frame = prev_frame;
174216
continue;
175217
}
176-
/*
177-
frame->f_lineno will not always return the correct line number
178-
you need to call PyCode_Addr2Line().
179-
*/
180-
line = GET_LINENO(frame);
181-
PyObject* line_obj = Py_BuildValue("i", line);
182-
if (!line_obj) {
183-
goto exit;
184-
}
185-
PyObject* func_name = GET_FUNCTION(frame);
186-
if (!func_name) {
187-
Py_DecRef(line_obj);
188-
goto exit;
189-
}
190-
PyObject* class_name = GET_CLASS(frame);
191-
if (!class_name) {
192-
Py_DecRef(line_obj);
193-
Py_DecRef(func_name);
194-
goto exit;
195-
}
196-
result = PyTuple_Pack(4, filename_o, line_obj, func_name, class_name);
197-
Py_DecRef(func_name);
198-
Py_DecRef(class_name);
199-
Py_DecRef(line_obj);
218+
FILENAME_DECREF(filename_o);
200219
break;
201220
}
202-
if (result == NULL) {
203-
goto exit_0;
221+
return frame;
222+
}
223+
224+
static PyObject*
225+
_get_result_tuple(PyFrameObject* frame)
226+
{
227+
PyObject* result = NULL;
228+
PyObject* filename_o = NULL;
229+
PyObject* line_o = NULL;
230+
PyObject* funcname_o = NULL;
231+
PyObject* classname_o = NULL;
232+
233+
filename_o = GET_FILENAME(frame);
234+
if (!filename_o) {
235+
goto error;
204236
}
205237

206-
exit:
207-
Py_DecRef(cwd_bytes);
208-
FRAME_XDECREF(frame);
238+
// frame->f_lineno will not always return the correct line number
239+
// you need to call PyCode_Addr2Line().
240+
int line = GET_LINENO(frame);
241+
line_o = Py_BuildValue("i", line);
242+
if (!line_o) {
243+
goto error;
244+
}
245+
funcname_o = GET_FUNCTION(frame);
246+
if (!funcname_o) {
247+
goto error;
248+
}
249+
classname_o = GET_CLASS(frame);
250+
if (!classname_o) {
251+
goto error;
252+
}
253+
result = PyTuple_Pack(4, filename_o, line_o, funcname_o, classname_o);
254+
255+
error:
209256
FILENAME_XDECREF(filename_o);
210-
in_stacktrace = 0;
257+
Py_XDECREF(line_o);
258+
Py_XDECREF(funcname_o);
259+
Py_XDECREF(classname_o);
211260
return result;
261+
}
262+
263+
/**
264+
* get_file_and_line
265+
*
266+
* Get the filename, line number, function name and class name of the original wrapped
267+
* function to report it.
268+
*
269+
* Returns a tuple:
270+
* (filename, line_number, function name, class name)
271+
**/
272+
static PyObject*
273+
get_file_and_line(PyObject* Py_UNUSED(module), PyObject* Py_UNUSED(args))
274+
{
275+
// Mark that we are now capturing a stack trace to avoid reentrant calls on GET_LOCALS
276+
in_stacktrace = 1;
277+
PyFrameObject* frame = NULL;
278+
PyFrameObject* backup_frame = NULL;
279+
PyObject* result = NULL;
280+
PyThreadState* tstate = PyThreadState_Get();
281+
if (!tstate) {
282+
goto exit;
283+
}
212284

213-
exit_0:; /* Label must be followed by a statement */
214-
// Return "", -1, "", ""
215-
PyObject* line_obj = Py_BuildValue("i", -1);
216-
filename_o = PyUnicode_FromString("");
217-
PyObject* func_name = PyUnicode_FromString("");
218-
PyObject* class_name = PyUnicode_FromString("");
219-
result = PyTuple_Pack(4, filename_o, line_obj, func_name, class_name);
220-
Py_DecRef(cwd_bytes);
285+
frame = GET_FRAME(tstate);
286+
if (!frame) {
287+
goto exit;
288+
}
289+
290+
// Skip all frames until the first non-ddtrace and non-stdlib frame.
291+
// Store that frame as backup (if any). If there is no better frame, fallback to this.
292+
// This happens, for example, when the vulnerability is in a package installed in site-packages.
293+
frame = _find_relevant_frame(frame, true);
294+
if (NULL == frame) {
295+
goto exit;
296+
}
297+
backup_frame = frame;
298+
FRAME_INCREF(backup_frame);
299+
300+
// Continue skipping until we find a frame that is both non-ddtrace and non-site-packages.
301+
frame = _find_relevant_frame(frame, false);
302+
if (NULL == frame) {
303+
frame = backup_frame;
304+
backup_frame = NULL;
305+
} else {
306+
FRAME_DECREF(backup_frame);
307+
}
308+
309+
result = _get_result_tuple(frame);
310+
311+
exit:
221312
FRAME_XDECREF(frame);
222-
FILENAME_XDECREF(filename_o);
223-
Py_DecRef(line_obj);
224-
Py_DecRef(func_name);
225-
Py_DecRef(class_name);
226313
in_stacktrace = 0;
227314
return result;
228315
}
229316

230317
static PyMethodDef StacktraceMethods[] = { { "get_info_frame",
231318
(PyCFunction)get_file_and_line,
232-
METH_O,
319+
METH_NOARGS,
233320
"Stacktrace function: returns (filename, line, method, class)" },
234321
{ NULL, NULL, 0, NULL } };
235322

@@ -245,5 +332,13 @@ PyInit__stacktrace(void)
245332
PyObject* m = PyModule_Create(&stacktrace);
246333
if (m == NULL)
247334
return NULL;
335+
STDLIB_PATH = get_sysconfig_path("stdlib");
336+
if (STDLIB_PATH) {
337+
STDLIB_PATH_LEN = strlen(STDLIB_PATH);
338+
}
339+
PURELIB_PATH = get_sysconfig_path("purelib");
340+
if (PURELIB_PATH) {
341+
PURELIB_PATH_LEN = strlen(PURELIB_PATH);
342+
}
248343
return m;
249344
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
def get_info_frame(cwd_obj): ...
1+
def get_info_frame(): ...

ddtrace/appsec/_iast/taint_sinks/_base.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import sysconfig
23
from typing import Any
34
from typing import Callable
45
from typing import Optional
@@ -30,6 +31,9 @@
3031

3132
TEXT_TYPES = Union[str, bytes, bytearray]
3233

34+
PURELIB_PATH = sysconfig.get_path("purelib")
35+
STDLIB_PATH = sysconfig.get_path("stdlib")
36+
3337

3438
class taint_sink_deduplication(deduplication):
3539
def _check_deduplication(self):
@@ -130,21 +134,32 @@ def _prepare_report(
130134
@classmethod
131135
def _compute_file_line(cls) -> Tuple[Optional[str], Optional[int], Optional[str], Optional[str]]:
132136
file_name = line_number = function_name = class_name = None
133-
134-
frame_info = get_info_frame(CWD)
137+
frame_info = get_info_frame()
135138
if not frame_info or frame_info[0] in ("", -1):
136139
return file_name, line_number, function_name, class_name
137140

138141
file_name, line_number, function_name, class_name = frame_info
139142

140-
if file_name.startswith(CWD):
141-
file_name = os.path.relpath(file_name, start=CWD)
143+
file_name = cls._rel_path(file_name)
144+
if not file_name:
145+
log.debug("Could not relativize vulnerability location path: %s", frame_info[0])
146+
return None, None, None, None
142147

143148
if not cls.is_not_reported(file_name, line_number):
144149
return None, None, None, None
145150

146151
return file_name, line_number, function_name, class_name
147152

153+
@staticmethod
154+
def _rel_path(file_name: str) -> str:
155+
if file_name.startswith(PURELIB_PATH):
156+
return os.path.relpath(file_name, start=PURELIB_PATH)
157+
if file_name.startswith(STDLIB_PATH):
158+
return os.path.relpath(file_name, start=STDLIB_PATH)
159+
if file_name.startswith(CWD):
160+
return os.path.relpath(file_name, start=CWD)
161+
return ""
162+
148163
@classmethod
149164
def _create_evidence_and_report(
150165
cls,
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
Code Security: IAST: Avoid excessive filtering of stacktrace locations when finding vulnerabilities. After this change, vulnerabilities that were previously discarded will now be reported. In particular, if they were found within code in site-packages or outside of the working directory.

tests/appsec/iast/taint_sinks/test_weak_hash.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from contextlib import contextmanager
2+
import os
23
import sys
34
from unittest import mock
45

@@ -95,9 +96,10 @@ def test_weak_hash_hashlib(iast_context_defaults, hash_func, method):
9596
],
9697
)
9798
def test_ensure_line_reported_is_minus_one_for_edge_cases(iast_context_defaults, hash_func, method, fake_line):
99+
absolute_path = os.path.abspath(WEAK_ALGOS_FIXTURES_PATH)
98100
with mock.patch(
99101
"ddtrace.appsec._iast.taint_sinks._base.get_info_frame",
100-
return_value=(WEAK_ALGOS_FIXTURES_PATH, fake_line, "", ""),
102+
return_value=(absolute_path, fake_line, "", ""),
101103
):
102104
parametrized_weak_hash(hash_func, method)
103105

0 commit comments

Comments
 (0)