Skip to content

Commit 12a280c

Browse files
committed
drm/dp_mst: Add topology ref history tracking for debugging
For very subtle mistakes with topology refs, it can be rather difficult to trace them down with the debugging info that we already have. I had one such issue recently while trying to implement suspend/resume reprobing for MST, and ended up coming up with this. Inspired by Chris Wilson's wakeref tracking for i915, this adds a very similar feature to the DP MST helpers, which allows for partial tracking of topology refs for both ports and branch devices. This is a lot less advanced then wakeref tracking: we merely keep a count of all of the spots where a topology ref has been grabbed or dropped, then dump out that history in chronological order when a port or branch device's topology refcount reaches 0. So far, I've found this incredibly useful for debugging topology refcount errors. Since this has the potential to be somewhat slow and loud, we add an expert kernel config option to enable or disable this feature, CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS. Changes since v1: * Don't forget to destroy topology_ref_history_lock Changes since v4: * Correct order of kref_put()/topology_ref_history_unlock - we can't unlock the history after kref_put() since the memory might have been freed by that point * Don't print message on allocation error failures, the kernel already does this for us Changes since v5: * Get rid of some leftover usages of %px * Remove a leftover empty return; statement Cc: Juston Li <[email protected]> Cc: Imre Deak <[email protected]> Cc: Ville Syrjälä <[email protected]> Cc: Harry Wentland <[email protected]> Cc: Daniel Vetter <[email protected]> Reviewed-by: Sean Paul <[email protected]> Signed-off-by: Lyude Paul <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 6f85f73 commit 12a280c

File tree

3 files changed

+285
-8
lines changed

3 files changed

+285
-8
lines changed

drivers/gpu/drm/Kconfig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,20 @@ config DRM_KMS_FB_HELPER
9393
help
9494
FBDEV helpers for KMS drivers.
9595

96+
config DRM_DEBUG_DP_MST_TOPOLOGY_REFS
97+
bool "Enable refcount backtrace history in the DP MST helpers"
98+
select STACKDEPOT
99+
depends on DRM_KMS_HELPER
100+
depends on DEBUG_KERNEL
101+
depends on EXPERT
102+
help
103+
Enables debug tracing for topology refs in DRM's DP MST helpers. A
104+
history of each topology reference/dereference will be printed to the
105+
kernel log once a port or branch device's topology refcount reaches 0.
106+
107+
This has the potential to use a lot of memory and print some very
108+
large kernel messages. If in doubt, say "N".
109+
96110
config DRM_FBDEV_EMULATION
97111
bool "Enable legacy fbdev support for your modesetting driver"
98112
depends on DRM

drivers/gpu/drm/drm_dp_mst_topology.c

Lines changed: 226 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@
2828
#include <linux/sched.h>
2929
#include <linux/seq_file.h>
3030

31+
#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
32+
#include <linux/stackdepot.h>
33+
#include <linux/sort.h>
34+
#include <linux/timekeeping.h>
35+
#include <linux/math64.h>
36+
#endif
37+
3138
#include <drm/drm_atomic.h>
3239
#include <drm/drm_atomic_helper.h>
3340
#include <drm/drm_dp_mst_helper.h>
@@ -1399,12 +1406,184 @@ drm_dp_mst_put_port_malloc(struct drm_dp_mst_port *port)
13991406
}
14001407
EXPORT_SYMBOL(drm_dp_mst_put_port_malloc);
14011408

1409+
#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
1410+
1411+
#define STACK_DEPTH 8
1412+
1413+
static noinline void
1414+
__topology_ref_save(struct drm_dp_mst_topology_mgr *mgr,
1415+
struct drm_dp_mst_topology_ref_history *history,
1416+
enum drm_dp_mst_topology_ref_type type)
1417+
{
1418+
struct drm_dp_mst_topology_ref_entry *entry = NULL;
1419+
depot_stack_handle_t backtrace;
1420+
ulong stack_entries[STACK_DEPTH];
1421+
uint n;
1422+
int i;
1423+
1424+
n = stack_trace_save(stack_entries, ARRAY_SIZE(stack_entries), 1);
1425+
backtrace = stack_depot_save(stack_entries, n, GFP_KERNEL);
1426+
if (!backtrace)
1427+
return;
1428+
1429+
/* Try to find an existing entry for this backtrace */
1430+
for (i = 0; i < history->len; i++) {
1431+
if (history->entries[i].backtrace == backtrace) {
1432+
entry = &history->entries[i];
1433+
break;
1434+
}
1435+
}
1436+
1437+
/* Otherwise add one */
1438+
if (!entry) {
1439+
struct drm_dp_mst_topology_ref_entry *new;
1440+
int new_len = history->len + 1;
1441+
1442+
new = krealloc(history->entries, sizeof(*new) * new_len,
1443+
GFP_KERNEL);
1444+
if (!new)
1445+
return;
1446+
1447+
entry = &new[history->len];
1448+
history->len = new_len;
1449+
history->entries = new;
1450+
1451+
entry->backtrace = backtrace;
1452+
entry->type = type;
1453+
entry->count = 0;
1454+
}
1455+
entry->count++;
1456+
entry->ts_nsec = ktime_get_ns();
1457+
}
1458+
1459+
static int
1460+
topology_ref_history_cmp(const void *a, const void *b)
1461+
{
1462+
const struct drm_dp_mst_topology_ref_entry *entry_a = a, *entry_b = b;
1463+
1464+
if (entry_a->ts_nsec > entry_b->ts_nsec)
1465+
return 1;
1466+
else if (entry_a->ts_nsec < entry_b->ts_nsec)
1467+
return -1;
1468+
else
1469+
return 0;
1470+
}
1471+
1472+
static inline const char *
1473+
topology_ref_type_to_str(enum drm_dp_mst_topology_ref_type type)
1474+
{
1475+
if (type == DRM_DP_MST_TOPOLOGY_REF_GET)
1476+
return "get";
1477+
else
1478+
return "put";
1479+
}
1480+
1481+
static void
1482+
__dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history,
1483+
void *ptr, const char *type_str)
1484+
{
1485+
struct drm_printer p = drm_debug_printer(DBG_PREFIX);
1486+
char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
1487+
int i;
1488+
1489+
if (!buf)
1490+
return;
1491+
1492+
if (!history->len)
1493+
goto out;
1494+
1495+
/* First, sort the list so that it goes from oldest to newest
1496+
* reference entry
1497+
*/
1498+
sort(history->entries, history->len, sizeof(*history->entries),
1499+
topology_ref_history_cmp, NULL);
1500+
1501+
drm_printf(&p, "%s (%p) topology count reached 0, dumping history:\n",
1502+
type_str, ptr);
1503+
1504+
for (i = 0; i < history->len; i++) {
1505+
const struct drm_dp_mst_topology_ref_entry *entry =
1506+
&history->entries[i];
1507+
ulong *entries;
1508+
uint nr_entries;
1509+
u64 ts_nsec = entry->ts_nsec;
1510+
u64 rem_nsec = do_div(ts_nsec, 1000000000);
1511+
1512+
nr_entries = stack_depot_fetch(entry->backtrace, &entries);
1513+
stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4);
1514+
1515+
drm_printf(&p, " %d %ss (last at %5llu.%06llu):\n%s",
1516+
entry->count,
1517+
topology_ref_type_to_str(entry->type),
1518+
ts_nsec, rem_nsec / 1000, buf);
1519+
}
1520+
1521+
/* Now free the history, since this is the only time we expose it */
1522+
kfree(history->entries);
1523+
out:
1524+
kfree(buf);
1525+
}
1526+
1527+
static __always_inline void
1528+
drm_dp_mst_dump_mstb_topology_history(struct drm_dp_mst_branch *mstb)
1529+
{
1530+
__dump_topology_ref_history(&mstb->topology_ref_history, mstb,
1531+
"MSTB");
1532+
}
1533+
1534+
static __always_inline void
1535+
drm_dp_mst_dump_port_topology_history(struct drm_dp_mst_port *port)
1536+
{
1537+
__dump_topology_ref_history(&port->topology_ref_history, port,
1538+
"Port");
1539+
}
1540+
1541+
static __always_inline void
1542+
save_mstb_topology_ref(struct drm_dp_mst_branch *mstb,
1543+
enum drm_dp_mst_topology_ref_type type)
1544+
{
1545+
__topology_ref_save(mstb->mgr, &mstb->topology_ref_history, type);
1546+
}
1547+
1548+
static __always_inline void
1549+
save_port_topology_ref(struct drm_dp_mst_port *port,
1550+
enum drm_dp_mst_topology_ref_type type)
1551+
{
1552+
__topology_ref_save(port->mgr, &port->topology_ref_history, type);
1553+
}
1554+
1555+
static inline void
1556+
topology_ref_history_lock(struct drm_dp_mst_topology_mgr *mgr)
1557+
{
1558+
mutex_lock(&mgr->topology_ref_history_lock);
1559+
}
1560+
1561+
static inline void
1562+
topology_ref_history_unlock(struct drm_dp_mst_topology_mgr *mgr)
1563+
{
1564+
mutex_unlock(&mgr->topology_ref_history_lock);
1565+
}
1566+
#else
1567+
static inline void
1568+
topology_ref_history_lock(struct drm_dp_mst_topology_mgr *mgr) {}
1569+
static inline void
1570+
topology_ref_history_unlock(struct drm_dp_mst_topology_mgr *mgr) {}
1571+
static inline void
1572+
drm_dp_mst_dump_mstb_topology_history(struct drm_dp_mst_branch *mstb) {}
1573+
static inline void
1574+
drm_dp_mst_dump_port_topology_history(struct drm_dp_mst_port *port) {}
1575+
#define save_mstb_topology_ref(mstb, type)
1576+
#define save_port_topology_ref(port, type)
1577+
#endif
1578+
14021579
static void drm_dp_destroy_mst_branch_device(struct kref *kref)
14031580
{
14041581
struct drm_dp_mst_branch *mstb =
14051582
container_of(kref, struct drm_dp_mst_branch, topology_kref);
14061583
struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
14071584

1585+
drm_dp_mst_dump_mstb_topology_history(mstb);
1586+
14081587
INIT_LIST_HEAD(&mstb->destroy_next);
14091588

14101589
/*
@@ -1442,11 +1621,17 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref)
14421621
static int __must_check
14431622
drm_dp_mst_topology_try_get_mstb(struct drm_dp_mst_branch *mstb)
14441623
{
1445-
int ret = kref_get_unless_zero(&mstb->topology_kref);
1624+
int ret;
14461625

1447-
if (ret)
1448-
DRM_DEBUG("mstb %p (%d)\n", mstb,
1449-
kref_read(&mstb->topology_kref));
1626+
topology_ref_history_lock(mstb->mgr);
1627+
ret = kref_get_unless_zero(&mstb->topology_kref);
1628+
if (ret) {
1629+
DRM_DEBUG("mstb %p (%d)\n",
1630+
mstb, kref_read(&mstb->topology_kref));
1631+
save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_GET);
1632+
}
1633+
1634+
topology_ref_history_unlock(mstb->mgr);
14501635

14511636
return ret;
14521637
}
@@ -1467,9 +1652,14 @@ drm_dp_mst_topology_try_get_mstb(struct drm_dp_mst_branch *mstb)
14671652
*/
14681653
static void drm_dp_mst_topology_get_mstb(struct drm_dp_mst_branch *mstb)
14691654
{
1655+
topology_ref_history_lock(mstb->mgr);
1656+
1657+
save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_GET);
14701658
WARN_ON(kref_read(&mstb->topology_kref) == 0);
14711659
kref_get(&mstb->topology_kref);
14721660
DRM_DEBUG("mstb %p (%d)\n", mstb, kref_read(&mstb->topology_kref));
1661+
1662+
topology_ref_history_unlock(mstb->mgr);
14731663
}
14741664

14751665
/**
@@ -1487,8 +1677,13 @@ static void drm_dp_mst_topology_get_mstb(struct drm_dp_mst_branch *mstb)
14871677
static void
14881678
drm_dp_mst_topology_put_mstb(struct drm_dp_mst_branch *mstb)
14891679
{
1680+
topology_ref_history_lock(mstb->mgr);
1681+
14901682
DRM_DEBUG("mstb %p (%d)\n",
14911683
mstb, kref_read(&mstb->topology_kref) - 1);
1684+
save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_PUT);
1685+
1686+
topology_ref_history_unlock(mstb->mgr);
14921687
kref_put(&mstb->topology_kref, drm_dp_destroy_mst_branch_device);
14931688
}
14941689

@@ -1498,6 +1693,8 @@ static void drm_dp_destroy_port(struct kref *kref)
14981693
container_of(kref, struct drm_dp_mst_port, topology_kref);
14991694
struct drm_dp_mst_topology_mgr *mgr = port->mgr;
15001695

1696+
drm_dp_mst_dump_port_topology_history(port);
1697+
15011698
/* There's nothing that needs locking to destroy an input port yet */
15021699
if (port->input) {
15031700
drm_dp_mst_put_port_malloc(port);
@@ -1541,12 +1738,17 @@ static void drm_dp_destroy_port(struct kref *kref)
15411738
static int __must_check
15421739
drm_dp_mst_topology_try_get_port(struct drm_dp_mst_port *port)
15431740
{
1544-
int ret = kref_get_unless_zero(&port->topology_kref);
1741+
int ret;
15451742

1546-
if (ret)
1547-
DRM_DEBUG("port %p (%d)\n", port,
1548-
kref_read(&port->topology_kref));
1743+
topology_ref_history_lock(port->mgr);
1744+
ret = kref_get_unless_zero(&port->topology_kref);
1745+
if (ret) {
1746+
DRM_DEBUG("port %p (%d)\n",
1747+
port, kref_read(&port->topology_kref));
1748+
save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_GET);
1749+
}
15491750

1751+
topology_ref_history_unlock(port->mgr);
15501752
return ret;
15511753
}
15521754

@@ -1565,9 +1767,14 @@ drm_dp_mst_topology_try_get_port(struct drm_dp_mst_port *port)
15651767
*/
15661768
static void drm_dp_mst_topology_get_port(struct drm_dp_mst_port *port)
15671769
{
1770+
topology_ref_history_lock(port->mgr);
1771+
15681772
WARN_ON(kref_read(&port->topology_kref) == 0);
15691773
kref_get(&port->topology_kref);
15701774
DRM_DEBUG("port %p (%d)\n", port, kref_read(&port->topology_kref));
1775+
save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_GET);
1776+
1777+
topology_ref_history_unlock(port->mgr);
15711778
}
15721779

15731780
/**
@@ -1583,8 +1790,13 @@ static void drm_dp_mst_topology_get_port(struct drm_dp_mst_port *port)
15831790
*/
15841791
static void drm_dp_mst_topology_put_port(struct drm_dp_mst_port *port)
15851792
{
1793+
topology_ref_history_lock(port->mgr);
1794+
15861795
DRM_DEBUG("port %p (%d)\n",
15871796
port, kref_read(&port->topology_kref) - 1);
1797+
save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_PUT);
1798+
1799+
topology_ref_history_unlock(port->mgr);
15881800
kref_put(&port->topology_kref, drm_dp_destroy_port);
15891801
}
15901802

@@ -4578,6 +4790,9 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
45784790
mutex_init(&mgr->delayed_destroy_lock);
45794791
mutex_init(&mgr->up_req_lock);
45804792
mutex_init(&mgr->probe_lock);
4793+
#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
4794+
mutex_init(&mgr->topology_ref_history_lock);
4795+
#endif
45814796
INIT_LIST_HEAD(&mgr->tx_msg_downq);
45824797
INIT_LIST_HEAD(&mgr->destroy_port_list);
45834798
INIT_LIST_HEAD(&mgr->destroy_branch_device_list);
@@ -4644,6 +4859,9 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
46444859
mutex_destroy(&mgr->lock);
46454860
mutex_destroy(&mgr->up_req_lock);
46464861
mutex_destroy(&mgr->probe_lock);
4862+
#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS)
4863+
mutex_destroy(&mgr->topology_ref_history_lock);
4864+
#endif
46474865
}
46484866
EXPORT_SYMBOL(drm_dp_mst_topology_mgr_destroy);
46494867

0 commit comments

Comments
 (0)