Skip to content

Commit d60c6a1

Browse files
committed
libdrgn: add register information to platform
In order to retrieve registers from stack traces, we need to know what registers are defined for a platform. This adds a small DSL for defining registers for an architecture. The DSL is parsed by an awk script that generates the necessary tables, lookup functions, and enum definitions.
1 parent b8c657d commit d60c6a1

File tree

18 files changed

+517
-22
lines changed

18 files changed

+517
-22
lines changed

docs/api_reference.rst

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ Platforms
424424

425425
.. attribute:: arch
426426

427-
The instruction set architecture of this platform.
427+
Instruction set architecture of this platform.
428428

429429
:vartype: Architecture
430430

@@ -434,6 +434,12 @@ Platforms
434434

435435
:vartype: PlatformFlags
436436

437+
.. attribute:: registers
438+
439+
Processor registers on this platform.
440+
441+
:vartype: list[Register]
442+
437443
.. class:: Architecture
438444

439445
``Architecture`` is an :class:`enum.Enum` of instruction set architectures.
@@ -461,6 +467,23 @@ Platforms
461467

462468
Platform is little-endian.
463469

470+
.. class:: Register
471+
472+
A ``Register`` represents information about a processor register.
473+
474+
.. attribute:: name
475+
476+
Name of this register.
477+
478+
:vartype: str
479+
480+
.. attribute:: number
481+
482+
Arbitrary number which uniquely identifies this register on its
483+
platform.
484+
485+
:vartype: int
486+
464487
.. attribute:: host_platform
465488

466489
The platform of the host which is running drgn.

drgn/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
Program,
5757
ProgramFlags,
5858
Qualifiers,
59+
Register,
5960
StackFrame,
6061
StackTrace,
6162
Symbol,
@@ -100,6 +101,7 @@
100101
'Program',
101102
'ProgramFlags',
102103
'Qualifiers',
104+
'Register',
103105
'StackFrame',
104106
'StackTrace',
105107
'Symbol',

libdrgn/Makefile.am

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,19 @@ ACLOCAL_AMFLAGS = -I m4
22

33
SUBDIRS = elfutils include
44

5+
.DELETE_ON_ERROR:
6+
57
AM_CPPFLAGS = -Iinclude -D_GNU_SOURCE
68

79
include_HEADERS = drgn.h
810

11+
BUILT_SOURCES = drgn.h
12+
913
noinst_LTLIBRARIES = libdrgnimpl.la
1014

11-
libdrgnimpl_la_SOURCES = arch_x86_64.c \
15+
ARCH_INS = arch_x86_64.c.in
16+
17+
libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \
1218
binary_search_tree.h \
1319
cityhash.h \
1420
dwarf_index.c \
@@ -63,6 +69,12 @@ libdrgnimpl_la_CFLAGS += $(libkdumpfile_CFLAGS)
6369
libdrgnimpl_la_LIBADD = $(libkdumpfile_LIBS)
6470
endif
6571

72+
arch_%.c: arch_%.c.in build-aux/gen_arch.awk build-aux/parse_arch.awk
73+
gawk -f $(word 3, $^) -f $(word 2, $^) $< > $@
74+
75+
drgn.h: build-aux/gen_drgn_h.awk build-aux/parse_arch.awk $(ARCH_INS) drgn.h.in
76+
gawk -f $(word 1, $^) -f $(word 2, $^) $(wordlist 3, $(words $^), $^) > $@
77+
6678
elfutils_LIBS = elfutils/libdw/libdw.a elfutils/libelf/libelf.a -lz -llzma -lbz2
6779

6880
lib_LTLIBRARIES = libdrgn.la
@@ -72,7 +84,7 @@ libdrgn_la_LDFLAGS = -version-info 0:0:0
7284
libdrgn_la_LIBADD = libdrgnimpl.la $(elfutils_LIBS)
7385

7486
if WITH_PYTHON
75-
BUILT_SOURCES = python/docstrings.h
87+
BUILT_SOURCES += python/docstrings.h
7688

7789
noinst_LTLIBRARIES += _drgn.la
7890
endif
@@ -105,16 +117,15 @@ if WITH_LIBKDUMPFILE
105117
_drgn_la_CFLAGS += $(libkdumpfile_CFLAGS)
106118
endif
107119

108-
GEN_CONSTANTS = $(top_srcdir)/build-aux/gen_constants.py
109-
GEN_DOCSTRINGS = $(top_srcdir)/build-aux/gen_docstrings.py
110-
111-
python/constants.c: drgn.h $(GEN_CONSTANTS)
112-
$(PYTHON) $(GEN_CONSTANTS) < $< > $@
120+
python/constants.c: drgn.h build-aux/gen_constants.py
121+
$(PYTHON) $(word 2, $^) < $< > $@
113122

114-
python/docstrings.c: ../docs/api_reference.rst $(GEN_DOCSTRINGS)
115-
$(PYTHON) $(GEN_DOCSTRINGS) < $< > $@
123+
python/docstrings.c: ../docs/api_reference.rst build-aux/gen_docstrings.py
124+
$(PYTHON) $(word 2, $^) < $< > $@
116125

117-
python/docstrings.h: ../docs/api_reference.rst $(GEN_DOCSTRINGS)
118-
$(PYTHON) $(GEN_DOCSTRINGS) -H < $< > $@
126+
python/docstrings.h: ../docs/api_reference.rst build-aux/gen_docstrings.py
127+
$(PYTHON) $(word 2, $^) -H < $< > $@
119128

120-
EXTRA_DIST = $(GEN_CONSTANTS) $(GEN_DOCSTRINGS)
129+
EXTRA_DIST = $(ARCH_INS) build-aux/gen_arch.awk build-aux/gen_constants.py \
130+
build-aux/gen_docstrings.py build-aux/gen_drgn_h.awk \
131+
build-aux/parse_arch.awk

libdrgn/arch_x86_64.c renamed to libdrgn/arch_x86_64.c.in

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,107 @@
1+
%{
12
// Copyright 2019 - Omar Sandoval
23
// SPDX-License-Identifier: GPL-3.0+
34

45
#include "internal.h"
56
#include "platform.h"
7+
%}
8+
9+
x86-64
10+
%%
11+
rax
12+
rdx
13+
rcx
14+
rbx
15+
rsi
16+
rdi
17+
rbp
18+
rsp
19+
r8
20+
r9
21+
r10
22+
r11
23+
r12
24+
r13
25+
r14
26+
r15
27+
# The System V ABI calls this the return address (RA) register, but it's
28+
# effectively the instruction pointer.
29+
rip
30+
xmm0
31+
xmm1
32+
xmm2
33+
xmm3
34+
xmm4
35+
xmm5
36+
xmm6
37+
xmm7
38+
xmm8
39+
xmm9
40+
xmm10
41+
xmm11
42+
xmm12
43+
xmm13
44+
xmm14
45+
xmm15
46+
st0
47+
st1
48+
st2
49+
st3
50+
st4
51+
st5
52+
st6
53+
st7
54+
mm0
55+
mm1
56+
mm2
57+
mm3
58+
mm4
59+
mm5
60+
mm6
61+
mm7
62+
rFLAGS
63+
es
64+
cs
65+
ss
66+
ds
67+
fs
68+
gs
69+
fs.base, 58
70+
gs.base
71+
tr, 62
72+
ldtr
73+
mxcsr
74+
fcw
75+
fsw
76+
xmm16
77+
xmm17
78+
xmm18
79+
xmm19
80+
xmm20
81+
xmm21
82+
xmm22
83+
xmm23
84+
xmm24
85+
xmm25
86+
xmm26
87+
xmm27
88+
xmm28
89+
xmm29
90+
xmm30
91+
xmm31
92+
k0, 118
93+
k1
94+
k2
95+
k3
96+
k4
97+
k5
98+
k6
99+
k7
100+
bnd0
101+
bnd1
102+
bnd2
103+
bnd3
104+
%%
6105

7106
static inline struct drgn_error *read_register(struct drgn_object *reg_obj,
8107
struct drgn_object *frame_obj,
@@ -102,8 +201,7 @@ linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread,
102201
}
103202

104203
const struct drgn_architecture_info arch_info_x86_64 = {
105-
.name = "x86-64",
106-
.arch = DRGN_ARCH_X86_64,
204+
ARCHITECTURE_INFO,
107205
.default_flags = (DRGN_PLATFORM_IS_64_BIT |
108206
DRGN_PLATFORM_IS_LITTLE_ENDIAN),
109207
.linux_kernel_set_initial_registers = linux_kernel_set_initial_registers_x86_64,

libdrgn/build-aux/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
*
22
!/.gitignore
3+
!/gen_arch.awk
34
!/gen_constants.py
45
!/gen_docstrings.py
6+
!/gen_drgn_h.awk
7+
!/parse_arch.awk
58
!/version.sh

libdrgn/build-aux/gen_arch.awk

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Copyright 2019 - Omar Sandoval
2+
# SPDX-License-Identifier: GPL-3.0+
3+
4+
# This script generates "arch_foo.c" from "arch_foo.c.in". It uses
5+
# "parse_arch.awk" to parse the input file and generates three definitions:
6+
#
7+
# 1. An array of register definitions:
8+
# static const struct drgn_register registers[];
9+
#
10+
# 2. A lookup function (implemented as a trie using nested switch statements):
11+
# static const struct drgn_register *register_by_name(const char *name);
12+
#
13+
# 3. A macro containing initializers for the "name", "arch", "registers",
14+
# "num_registers", and "register_by_name" members of "struct
15+
# drgn_architecture_info":
16+
# #define ARCHITECTURE_INFO
17+
#
18+
# The prologue and epilogue are copied before and after these definitions,
19+
# respectively.
20+
21+
function add_to_trie(node, s, value, char) {
22+
if (length(s) == 0) {
23+
node[""] = value
24+
} else {
25+
char = substr(s, 1, 1)
26+
if (!(char in node)) {
27+
# Force node[char] to be an array.
28+
node[char][""] = ""
29+
delete node[char][""]
30+
}
31+
add_to_trie(node[char], substr(s, 2), value)
32+
}
33+
}
34+
35+
function trie_to_switch(node, indent, char) {
36+
print indent "switch (*(p++)) {"
37+
PROCINFO["sorted_in"] = "@ind_str_asc"
38+
for (char in node) {
39+
if (length(char) == 0) {
40+
print indent "case '\\0':"
41+
print indent "\treturn &registers[" node[""] "];"
42+
} else {
43+
print indent "case '" char "':"
44+
trie_to_switch(node[char], "\t" indent)
45+
}
46+
}
47+
print indent "default:"
48+
print indent "\treturn NULL;"
49+
print indent "}"
50+
}
51+
52+
ENDFILE {
53+
print "/* Generated by libdrgn/build-aux/gen_arch.awk. */"
54+
55+
if (length(prologue) != 0)
56+
print prologue
57+
58+
print "static const struct drgn_register registers[] = {"
59+
i = 0
60+
split("", trie)
61+
PROCINFO["sorted_in"] = "@val_num_asc"
62+
for (reg in registers) {
63+
print "\t{ \"" reg "\", " registers[reg] ", },"
64+
add_to_trie(trie, reg, i++)
65+
}
66+
print "};"
67+
print ""
68+
69+
print "static const struct drgn_register *register_by_name(const char *p)"
70+
print "{"
71+
trie_to_switch(trie, "\t")
72+
print "}"
73+
print ""
74+
75+
print "#define ARCHITECTURE_INFO \\"
76+
print "\t.name = \"" arch_name "\", \\"
77+
print "\t.arch = DRGN_ARCH_" toupper(sanitize(arch_name)) ", \\"
78+
print "\t.registers = registers, \\"
79+
print "\t.num_registers = " i ", \\"
80+
print "\t.register_by_name = register_by_name"
81+
82+
printf "%s", epilogue
83+
}

libdrgn/build-aux/gen_drgn_h.awk

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright 2019 - Omar Sandoval
2+
# SPDX-License-Identifier: GPL-3.0+
3+
4+
# This script generates "drgn.h" from "drgn.h.in" and all of the
5+
# "arch_foo.c.in" files. It replaces @ENUM_DRGN_REGISTER_NUMBER@ with the
6+
# definition generated from the "arch_foo.c.in" files.
7+
8+
BEGIN {
9+
enum = "enum drgn_register_number {\n"
10+
}
11+
12+
ARGIND == ARGC - 1 {
13+
gsub(/@ENUM_DRGN_REGISTER_NUMBER@/, enum)
14+
print
15+
next
16+
}
17+
18+
ENDFILE {
19+
if (ARGIND == ARGC - 1)
20+
exit
21+
if (!match(FILENAME, /^([^\/]*\/)*arch_([^\/]*)\.c\.in$/, group)) {
22+
print FILENAME ": error: could not parse architecture name" > "/dev/stderr"
23+
exit 1
24+
}
25+
prefix = "DRGN_REGISTER_" toupper(sanitize(arch_name)) "_"
26+
PROCINFO["sorted_in"] = "@val_num_asc"
27+
for (reg in registers)
28+
enum = enum "\t" prefix sanitize(reg) " = " registers[reg] ",\n"
29+
if (ARGIND == ARGC - 2)
30+
enum = enum "};"
31+
}

0 commit comments

Comments
 (0)