Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 171 additions & 76 deletions ddtrace/appsec/_iast/_stacktrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ static __thread int in_stacktrace = 0;
#include <Python.h>
#include <frameobject.h>
#include <patchlevel.h>
#include <stdbool.h>

#ifdef _WIN32
#define DD_TRACE_INSTALLED_PREFIX "\\ddtrace\\"
Expand All @@ -25,6 +26,7 @@ static __thread int in_stacktrace = 0;
#define GET_LINENO(frame) PyFrame_GetLineNumber((PyFrameObject*)frame)
#define GET_FRAME(tstate) PyThreadState_GetFrame(tstate)
#define GET_PREVIOUS(frame) PyFrame_GetBack(frame)
#define FRAME_INCREF(frame) Py_INCREF((PyObject*)frame)
#define FRAME_DECREF(frame) Py_DecRef((PyObject*)frame)
#define FRAME_XDECREF(frame) Py_XDECREF((PyObject*)frame)
#define FILENAME_DECREF(filename) Py_DecRef(filename)
Expand Down Expand Up @@ -68,6 +70,7 @@ GET_FUNCTION(PyFrameObject* frame)
#define GET_FRAME(tstate) tstate->frame
#define GET_PREVIOUS(frame) frame->f_back
#define GET_FILENAME(frame) ((PyObject*)(frame->f_code->co_filename))
#define FRAME_INCREF(frame)
#define FRAME_DECREF(frame)
#define FRAME_XDECREF(frame)
#define FILENAME_DECREF(filename)
Expand All @@ -88,6 +91,14 @@ GET_FUNCTION(PyFrameObject* frame)
#endif
#endif

// Python standard library path
static char* STDLIB_PATH = NULL;
static ssize_t STDLIB_PATH_LEN = 0;

// Python site-packages path
static char* PURELIB_PATH = NULL;
static ssize_t PURELIB_PATH_LEN = 0;

static inline PyObject*
SAFE_GET_LOCALS(PyFrameObject* frame)
{
Expand Down Expand Up @@ -121,115 +132,191 @@ GET_CLASS(PyFrameObject* frame)
}

/**
* get_file_and_line
*
* Get the filename, line number, function name and class name of the original wrapped
* function to report it.
*
* Returns a tuple:
* (filename, line_number, function name, class name)
**/
static PyObject*
get_file_and_line(PyObject* Py_UNUSED(module), PyObject* cwd_obj)
* Checks if the filename is special.
* For example, a frozen module (`<frozen 'os'>`), a template (`<template>`), etc.
*/
static inline bool
_is_special_frame(const char* filename)
{
// Mark that we are now capturing a stack trace to avoid reentrant calls on GET_LOCALS
in_stacktrace = 1;
return filename && strncmp(filename, "<", strlen("<")) == 0;
}

PyThreadState* tstate = PyThreadState_Get();
if (!tstate) {
goto exit_0;
}
static inline bool
_is_ddtrace_filename(const char* filename)
{
return filename && strstr(filename, DD_TRACE_INSTALLED_PREFIX) != NULL && strstr(filename, TESTS_PREFIX) == NULL;
}

int line;
PyObject* filename_o = NULL;
PyObject* result = NULL;
PyObject* cwd_bytes = NULL;
char* cwd = NULL;
static inline bool
_is_site_packages_filename(const char* filename)
{
const bool res = filename && PURELIB_PATH && strncmp(filename, PURELIB_PATH, PURELIB_PATH_LEN) == 0;
return res;
}

static inline bool
_is_stdlib_filename(const char* filename)
{
// site-packages is often a subdirectory of stdlib directory, so stdlib
// path is defined as prefixed by stdlib and not prefixed by purelib.
// TODO: As of Python 3.10, we could use sys.stdlib_module_names.
const bool res = filename && STDLIB_PATH && !_is_site_packages_filename(filename) &&
strncmp(filename, STDLIB_PATH, STDLIB_PATH_LEN) == 0;
return res;
}

if (!PyUnicode_FSConverter(cwd_obj, &cwd_bytes)) {
goto exit_0;
static char*
get_sysconfig_path(const char* name)
{
PyObject* sysconfig_mod = PyImport_ImportModule("sysconfig");
if (!sysconfig_mod) {
return NULL;
}
cwd = PyBytes_AsString(cwd_bytes);
if (!cwd) {
goto exit_0;

PyObject* path = PyObject_CallMethod(sysconfig_mod, "get_path", "s", name);
if (!path) {
Py_DECREF(sysconfig_mod);
return NULL;
}

PyFrameObject* frame = GET_FRAME(tstate);
if (!frame) {
goto exit_0;
const char* path_str = PyUnicode_AsUTF8(path);
char* res = NULL;
if (path_str) {
res = strdup(path_str);
}
Py_DECREF(path);
Py_DECREF(sysconfig_mod);
return res;
}

/**
* Gets a reference to a PyFrameObject and walks up the stack until a relevant frame is found.
*
* Returns a new reference to the PyFrameObject.
*
* The caller is not responsible for DECREF'ing the given PyFrameObject, but it is responsible for
* DECREF'ing the returned PyFrameObject.
*/
static PyFrameObject*
_find_relevant_frame(PyFrameObject* frame, bool allow_site_packages)
{
while (NULL != frame) {
filename_o = GET_FILENAME(frame);
PyObject* filename_o = GET_FILENAME(frame);
if (!filename_o) {
goto exit;
FRAME_DECREF(frame);
return NULL;
}
const char* filename = PyUnicode_AsUTF8(filename_o);
if (((strstr(filename, DD_TRACE_INSTALLED_PREFIX) != NULL && strstr(filename, TESTS_PREFIX) == NULL)) ||
(strstr(filename, SITE_PACKAGES_PREFIX) != NULL || strstr(filename, cwd) == NULL)) {
if (_is_special_frame(filename) || _is_ddtrace_filename(filename) || _is_stdlib_filename(filename) ||
(!allow_site_packages && _is_site_packages_filename(filename))) {
PyFrameObject* prev_frame = GET_PREVIOUS(frame);
FRAME_DECREF(frame);
FILENAME_DECREF(filename_o);
frame = prev_frame;
continue;
}
/*
frame->f_lineno will not always return the correct line number
you need to call PyCode_Addr2Line().
*/
line = GET_LINENO(frame);
PyObject* line_obj = Py_BuildValue("i", line);
if (!line_obj) {
goto exit;
}
PyObject* func_name = GET_FUNCTION(frame);
if (!func_name) {
Py_DecRef(line_obj);
goto exit;
}
PyObject* class_name = GET_CLASS(frame);
if (!class_name) {
Py_DecRef(line_obj);
Py_DecRef(func_name);
goto exit;
}
result = PyTuple_Pack(4, filename_o, line_obj, func_name, class_name);
Py_DecRef(func_name);
Py_DecRef(class_name);
Py_DecRef(line_obj);
FILENAME_DECREF(filename_o);
break;
}
if (result == NULL) {
goto exit_0;
return frame;
}

static PyObject*
_get_result_tuple(PyFrameObject* frame)
{
PyObject* result = NULL;
PyObject* filename_o = NULL;
PyObject* line_o = NULL;
PyObject* funcname_o = NULL;
PyObject* classname_o = NULL;

filename_o = GET_FILENAME(frame);
if (!filename_o) {
goto error;
}

exit:
Py_DecRef(cwd_bytes);
FRAME_XDECREF(frame);
// frame->f_lineno will not always return the correct line number
// you need to call PyCode_Addr2Line().
int line = GET_LINENO(frame);
line_o = Py_BuildValue("i", line);
if (!line_o) {
goto error;
}
funcname_o = GET_FUNCTION(frame);
if (!funcname_o) {
goto error;
}
classname_o = GET_CLASS(frame);
if (!classname_o) {
goto error;
}
result = PyTuple_Pack(4, filename_o, line_o, funcname_o, classname_o);

error:
FILENAME_XDECREF(filename_o);
in_stacktrace = 0;
Py_XDECREF(line_o);
Py_XDECREF(funcname_o);
Py_XDECREF(classname_o);
return result;
}

/**
* get_file_and_line
*
* Get the filename, line number, function name and class name of the original wrapped
* function to report it.
*
* Returns a tuple:
* (filename, line_number, function name, class name)
**/
static PyObject*
get_file_and_line(PyObject* Py_UNUSED(module), PyObject* Py_UNUSED(args))
{
// Mark that we are now capturing a stack trace to avoid reentrant calls on GET_LOCALS
in_stacktrace = 1;
PyFrameObject* frame = NULL;
PyFrameObject* backup_frame = NULL;
PyObject* result = NULL;
PyThreadState* tstate = PyThreadState_Get();
if (!tstate) {
goto exit;
}

exit_0:; /* Label must be followed by a statement */
// Return "", -1, "", ""
PyObject* line_obj = Py_BuildValue("i", -1);
filename_o = PyUnicode_FromString("");
PyObject* func_name = PyUnicode_FromString("");
PyObject* class_name = PyUnicode_FromString("");
result = PyTuple_Pack(4, filename_o, line_obj, func_name, class_name);
Py_DecRef(cwd_bytes);
frame = GET_FRAME(tstate);
if (!frame) {
goto exit;
}

// Skip all frames until the first non-ddtrace and non-stdlib frame.
// Store that frame as backup (if any). If there is no better frame, fallback to this.
// This happens, for example, when the vulnerability is in a package installed in site-packages.
frame = _find_relevant_frame(frame, true);
if (NULL == frame) {
goto exit;
}
backup_frame = frame;
FRAME_INCREF(backup_frame);

// Continue skipping until we find a frame that is both non-ddtrace and non-site-packages.
frame = _find_relevant_frame(frame, false);
if (NULL == frame) {
frame = backup_frame;
backup_frame = NULL;
} else {
FRAME_DECREF(backup_frame);
}

result = _get_result_tuple(frame);

exit:
FRAME_XDECREF(frame);
FILENAME_XDECREF(filename_o);
Py_DecRef(line_obj);
Py_DecRef(func_name);
Py_DecRef(class_name);
in_stacktrace = 0;
return result;
}

static PyMethodDef StacktraceMethods[] = { { "get_info_frame",
(PyCFunction)get_file_and_line,
METH_O,
METH_NOARGS,
"Stacktrace function: returns (filename, line, method, class)" },
{ NULL, NULL, 0, NULL } };

Expand All @@ -245,5 +332,13 @@ PyInit__stacktrace(void)
PyObject* m = PyModule_Create(&stacktrace);
if (m == NULL)
return NULL;
STDLIB_PATH = get_sysconfig_path("stdlib");
if (STDLIB_PATH) {
STDLIB_PATH_LEN = strlen(STDLIB_PATH);
}
PURELIB_PATH = get_sysconfig_path("purelib");
if (PURELIB_PATH) {
PURELIB_PATH_LEN = strlen(PURELIB_PATH);
}
return m;
}
2 changes: 1 addition & 1 deletion ddtrace/appsec/_iast/_stacktrace.pyi
Original file line number Diff line number Diff line change
@@ -1 +1 @@
def get_info_frame(cwd_obj): ...
def get_info_frame(): ...
23 changes: 19 additions & 4 deletions ddtrace/appsec/_iast/taint_sinks/_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sysconfig
from typing import Any
from typing import Callable
from typing import Optional
Expand Down Expand Up @@ -30,6 +31,9 @@

TEXT_TYPES = Union[str, bytes, bytearray]

PURELIB_PATH = sysconfig.get_path("purelib")
STDLIB_PATH = sysconfig.get_path("stdlib")


class taint_sink_deduplication(deduplication):
def _check_deduplication(self):
Expand Down Expand Up @@ -130,21 +134,32 @@ def _prepare_report(
@classmethod
def _compute_file_line(cls) -> Tuple[Optional[str], Optional[int], Optional[str], Optional[str]]:
file_name = line_number = function_name = class_name = None

frame_info = get_info_frame(CWD)
frame_info = get_info_frame()
if not frame_info or frame_info[0] in ("", -1):
return file_name, line_number, function_name, class_name

file_name, line_number, function_name, class_name = frame_info

if file_name.startswith(CWD):
file_name = os.path.relpath(file_name, start=CWD)
file_name = cls._rel_path(file_name)
if not file_name:
log.debug("Could not relativize vulnerability location path: %s", frame_info[0])
return None, None, None, None

if not cls.is_not_reported(file_name, line_number):
return None, None, None, None

return file_name, line_number, function_name, class_name

@staticmethod
def _rel_path(file_name: str) -> str:
if file_name.startswith(PURELIB_PATH):
return os.path.relpath(file_name, start=PURELIB_PATH)
if file_name.startswith(STDLIB_PATH):
return os.path.relpath(file_name, start=STDLIB_PATH)
if file_name.startswith(CWD):
return os.path.relpath(file_name, start=CWD)
return ""

@classmethod
def _create_evidence_and_report(
cls,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Code Security: IAST: Avoid excessive filtering of stacktrace locations when finding vulnerabilities. After this change, vulnerabilities that were previously discarded will now be reported. In particular, if they were found within code in site-packages or outside of the working directory.
4 changes: 3 additions & 1 deletion tests/appsec/iast/taint_sinks/test_weak_hash.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from contextlib import contextmanager
import os
import sys
from unittest import mock

Expand Down Expand Up @@ -95,9 +96,10 @@ def test_weak_hash_hashlib(iast_context_defaults, hash_func, method):
],
)
def test_ensure_line_reported_is_minus_one_for_edge_cases(iast_context_defaults, hash_func, method, fake_line):
absolute_path = os.path.abspath(WEAK_ALGOS_FIXTURES_PATH)
with mock.patch(
"ddtrace.appsec._iast.taint_sinks._base.get_info_frame",
return_value=(WEAK_ALGOS_FIXTURES_PATH, fake_line, "", ""),
return_value=(absolute_path, fake_line, "", ""),
):
parametrized_weak_hash(hash_func, method)

Expand Down
Loading
Loading