@@ -50,7 +50,7 @@ extern void *memset(void *s, int c, size_t n);
5050#include "rust-demangle.h"
5151
5252
53- /* Mangled Rust symbols look like this:
53+ /* Mangled (legacy) Rust symbols look like this:
5454 _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
5555
5656 The original symbol is:
@@ -74,16 +74,7 @@ extern void *memset(void *s, int c, size_t n);
7474 ">" => $GT$
7575 "(" => $LP$
7676 ")" => $RP$
77- " " => $u20$
78- "\"" => $u22$
79- "'" => $u27$
80- "+" => $u2b$
81- ";" => $u3b$
82- "[" => $u5b$
83- "]" => $u5d$
84- "{" => $u7b$
85- "}" => $u7d$
86- "~" => $u7e$
77+ "\u{XY}" => $uXY$
8778
8879 A double ".." means "::" and a single "." means "-".
8980
@@ -95,7 +86,8 @@ static const size_t hash_len = 16;
9586
9687static int is_prefixed_hash (const char * start );
9788static int looks_like_rust (const char * sym , size_t len );
98- static int unescape (const char * * in , char * * out , const char * seq , char value );
89+ static int parse_lower_hex_nibble (char nibble );
90+ static char parse_legacy_escape (const char * * in );
9991
10092/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling
10193
@@ -149,20 +141,20 @@ is_prefixed_hash (const char *str)
149141 const char * end ;
150142 char seen [16 ];
151143 size_t i ;
152- int count ;
144+ int count , nibble ;
153145
154146 if (strncmp (str , hash_prefix , hash_prefix_len ))
155147 return 0 ;
156148 str += hash_prefix_len ;
157149
158150 memset (seen , 0 , sizeof (seen ));
159151 for (end = str + hash_len ; str < end ; str ++ )
160- if ( * str >= '0' && * str <= '9' )
161- seen [ * str - '0' ] = 1 ;
162- else if (* str >= 'a' && * str <= 'f' )
163- seen [ * str - 'a' + 10 ] = 1 ;
164- else
165- return 0 ;
152+ {
153+ nibble = parse_lower_hex_nibble ( * str ) ;
154+ if (nibble < 0 )
155+ return 0 ;
156+ seen [ nibble ] = 1 ;
157+ }
166158
167159 /* Count how many distinct digits seen */
168160 count = 0 ;
@@ -179,57 +171,17 @@ looks_like_rust (const char *str, size_t len)
179171 const char * end = str + len ;
180172
181173 while (str < end )
182- switch (* str )
183- {
184- case '$' :
185- if (!strncmp (str , "$C$" , 3 ))
186- str += 3 ;
187- else if (!strncmp (str , "$SP$" , 4 )
188- || !strncmp (str , "$BP$" , 4 )
189- || !strncmp (str , "$RF$" , 4 )
190- || !strncmp (str , "$LT$" , 4 )
191- || !strncmp (str , "$GT$" , 4 )
192- || !strncmp (str , "$LP$" , 4 )
193- || !strncmp (str , "$RP$" , 4 ))
194- str += 4 ;
195- else if (!strncmp (str , "$u20$" , 5 )
196- || !strncmp (str , "$u22$" , 5 )
197- || !strncmp (str , "$u27$" , 5 )
198- || !strncmp (str , "$u2b$" , 5 )
199- || !strncmp (str , "$u3b$" , 5 )
200- || !strncmp (str , "$u5b$" , 5 )
201- || !strncmp (str , "$u5d$" , 5 )
202- || !strncmp (str , "$u7b$" , 5 )
203- || !strncmp (str , "$u7d$" , 5 )
204- || !strncmp (str , "$u7e$" , 5 ))
205- str += 5 ;
206- else
207- return 0 ;
208- break ;
209- case '.' :
210- /* Do not allow three or more consecutive dots */
211- if (!strncmp (str , "..." , 3 ))
212- return 0 ;
213- /* Fall through */
214- case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
215- case 'g' : case 'h' : case 'i' : case 'j' : case 'k' : case 'l' :
216- case 'm' : case 'n' : case 'o' : case 'p' : case 'q' : case 'r' :
217- case 's' : case 't' : case 'u' : case 'v' : case 'w' : case 'x' :
218- case 'y' : case 'z' :
219- case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
220- case 'G' : case 'H' : case 'I' : case 'J' : case 'K' : case 'L' :
221- case 'M' : case 'N' : case 'O' : case 'P' : case 'Q' : case 'R' :
222- case 'S' : case 'T' : case 'U' : case 'V' : case 'W' : case 'X' :
223- case 'Y' : case 'Z' :
224- case '0' : case '1' : case '2' : case '3' : case '4' : case '5' :
225- case '6' : case '7' : case '8' : case '9' :
226- case '_' :
227- case ':' :
228- str ++ ;
229- break ;
230- default :
231- return 0 ;
232- }
174+ {
175+ if (* str == '$' )
176+ {
177+ if (!parse_legacy_escape (& str ))
178+ return 0 ;
179+ }
180+ else if (* str == '.' || * str == '_' || * str == ':' || ISALNUM (* str ))
181+ str ++ ;
182+ else
183+ return 0 ;
184+ }
233185
234186 return 1 ;
235187}
@@ -246,6 +198,7 @@ rust_demangle_sym (char *sym)
246198 const char * in ;
247199 char * out ;
248200 const char * end ;
201+ char unescaped ;
249202
250203 if (!sym )
251204 return ;
@@ -255,75 +208,49 @@ rust_demangle_sym (char *sym)
255208 end = sym + strlen (sym ) - (hash_prefix_len + hash_len );
256209
257210 while (in < end )
258- switch (* in )
259- {
260- case '$' :
261- if (!(unescape (& in , & out , "$C$" , ',' )
262- || unescape (& in , & out , "$SP$" , '@' )
263- || unescape (& in , & out , "$BP$" , '*' )
264- || unescape (& in , & out , "$RF$" , '&' )
265- || unescape (& in , & out , "$LT$" , '<' )
266- || unescape (& in , & out , "$GT$" , '>' )
267- || unescape (& in , & out , "$LP$" , '(' )
268- || unescape (& in , & out , "$RP$" , ')' )
269- || unescape (& in , & out , "$u20$" , ' ' )
270- || unescape (& in , & out , "$u22$" , '\"' )
271- || unescape (& in , & out , "$u27$" , '\'' )
272- || unescape (& in , & out , "$u2b$" , '+' )
273- || unescape (& in , & out , "$u3b$" , ';' )
274- || unescape (& in , & out , "$u5b$" , '[' )
275- || unescape (& in , & out , "$u5d$" , ']' )
276- || unescape (& in , & out , "$u7b$" , '{' )
277- || unescape (& in , & out , "$u7d$" , '}' )
278- || unescape (& in , & out , "$u7e$" , '~' ))) {
279- /* unexpected escape sequence, not looks_like_rust. */
280- goto fail ;
281- }
282- break ;
283- case '_' :
284- /* If this is the start of a path component and the next
285- character is an escape sequence, ignore the underscore. The
286- mangler inserts an underscore to make sure the path
287- component begins with a XID_Start character. */
288- if ((in == sym || in [-1 ] == ':' ) && in [1 ] == '$' )
289- in ++ ;
290- else
291- * out ++ = * in ++ ;
292- break ;
293- case '.' :
294- if (in [1 ] == '.' )
295- {
296- /* ".." becomes "::" */
297- * out ++ = ':' ;
298- * out ++ = ':' ;
299- in += 2 ;
300- }
301- else
302- {
303- /* "." becomes "-" */
304- * out ++ = '-' ;
305- in ++ ;
306- }
307- break ;
308- case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
309- case 'g' : case 'h' : case 'i' : case 'j' : case 'k' : case 'l' :
310- case 'm' : case 'n' : case 'o' : case 'p' : case 'q' : case 'r' :
311- case 's' : case 't' : case 'u' : case 'v' : case 'w' : case 'x' :
312- case 'y' : case 'z' :
313- case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
314- case 'G' : case 'H' : case 'I' : case 'J' : case 'K' : case 'L' :
315- case 'M' : case 'N' : case 'O' : case 'P' : case 'Q' : case 'R' :
316- case 'S' : case 'T' : case 'U' : case 'V' : case 'W' : case 'X' :
317- case 'Y' : case 'Z' :
318- case '0' : case '1' : case '2' : case '3' : case '4' : case '5' :
319- case '6' : case '7' : case '8' : case '9' :
320- case ':' :
321- * out ++ = * in ++ ;
322- break ;
323- default :
324- /* unexpected character in symbol, not looks_like_rust. */
325- goto fail ;
326- }
211+ {
212+ if (* in == '$' )
213+ {
214+ unescaped = parse_legacy_escape (& in );
215+ if (unescaped )
216+ * out ++ = unescaped ;
217+ else
218+ /* unexpected escape sequence, not looks_like_rust. */
219+ goto fail ;
220+ }
221+ else if (* in == '_' )
222+ {
223+ /* If this is the start of a path component and the next
224+ character is an escape sequence, ignore the underscore. The
225+ mangler inserts an underscore to make sure the path
226+ component begins with a XID_Start character. */
227+ if ((in == sym || in [-1 ] == ':' ) && in [1 ] == '$' )
228+ in ++ ;
229+ else
230+ * out ++ = * in ++ ;
231+ }
232+ else if (* in == '.' )
233+ {
234+ if (in [1 ] == '.' )
235+ {
236+ /* ".." becomes "::" */
237+ * out ++ = ':' ;
238+ * out ++ = ':' ;
239+ in += 2 ;
240+ }
241+ else
242+ {
243+ /* "." becomes "-" */
244+ * out ++ = '-' ;
245+ in ++ ;
246+ }
247+ }
248+ else if (* in == ':' || ISALNUM (* in ))
249+ * out ++ = * in ++ ;
250+ else
251+ /* unexpected character in symbol, not looks_like_rust. */
252+ goto fail ;
253+ }
327254 goto done ;
328255
329256fail :
@@ -332,18 +259,78 @@ rust_demangle_sym (char *sym)
332259 * out = '\0' ;
333260}
334261
262+ /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
335263static int
336- unescape ( const char * * in , char * * out , const char * seq , char value )
264+ parse_lower_hex_nibble ( char nibble )
337265{
338- size_t len = strlen (seq );
266+ if ('0' <= nibble && nibble <= '9' )
267+ return nibble - '0' ;
268+ if ('a' <= nibble && nibble <= 'f' )
269+ return 0xa + (nibble - 'a' );
270+ return -1 ;
271+ }
339272
340- if (strncmp (* in , seq , len ))
341- return 0 ;
273+ /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
274+ static char
275+ parse_legacy_escape (const char * * in )
276+ {
277+ char c = 0 ;
278+ const char * e ;
279+ size_t escape_len = 0 ;
280+ int lo_nibble = -1 , hi_nibble = -1 ;
342281
343- * * out = value ;
282+ if ((* in )[0 ] != '$' )
283+ return 0 ;
344284
345- * in += len ;
346- * out += 1 ;
285+ e = * in + 1 ;
286+
287+ if (e [0 ] == 'C' )
288+ {
289+ escape_len = 1 ;
290+
291+ c = ',' ;
292+ }
293+ else
294+ {
295+ escape_len = 2 ;
296+
297+ if (e [0 ] == 'S' && e [1 ] == 'P' )
298+ c = '@' ;
299+ else if (e [0 ] == 'B' && e [1 ] == 'P' )
300+ c = '*' ;
301+ else if (e [0 ] == 'R' && e [1 ] == 'F' )
302+ c = '&' ;
303+ else if (e [0 ] == 'L' && e [1 ] == 'T' )
304+ c = '<' ;
305+ else if (e [0 ] == 'G' && e [1 ] == 'T' )
306+ c = '>' ;
307+ else if (e [0 ] == 'L' && e [1 ] == 'P' )
308+ c = '(' ;
309+ else if (e [0 ] == 'R' && e [1 ] == 'P' )
310+ c = ')' ;
311+ else if (e [0 ] == 'u' )
312+ {
313+ escape_len = 3 ;
314+
315+ hi_nibble = parse_lower_hex_nibble (e [1 ]);
316+ if (hi_nibble < 0 )
317+ return 0 ;
318+ lo_nibble = parse_lower_hex_nibble (e [2 ]);
319+ if (lo_nibble < 0 )
320+ return 0 ;
321+
322+ /* Only allow non-control ASCII characters. */
323+ if (hi_nibble > 7 )
324+ return 0 ;
325+ c = (hi_nibble << 4 ) | lo_nibble ;
326+ if (c < 0x20 )
327+ return 0 ;
328+ }
329+ }
330+
331+ if (!c || e [escape_len ] != '$' )
332+ return 0 ;
347333
348- return 1 ;
334+ * in += 2 + escape_len ;
335+ return c ;
349336}
0 commit comments