Skip to content

Commit 264c9bc

Browse files
HTML IDs can contain anything except whitespace.
Introduced TY_(IsHTMLSpace)(uint c), which checks to see if c is one of the chars that the HTML spec (and browsers) treat as a space in attribute values: 0x020 (space), 0x009 (tab), 0x00a (LF), 0x00c (FF), or 0x00d (CF). Can't use ANSI C isspace(int c) here because like standard functions for many other langs, it also treats 0x00b as a space.
1 parent e84a6d2 commit 264c9bc

File tree

3 files changed

+7
-4
lines changed

3 files changed

+7
-4
lines changed

src/attrs.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,11 +1367,8 @@ Bool TY_(IsValidHTMLID)(ctmbstr id)
13671367
if (!s)
13681368
return no;
13691369

1370-
if (!TY_(IsLetter)(*s++))
1371-
return no;
1372-
13731370
while (*s)
1374-
if (!TY_(IsNamechar)(*s++))
1371+
if (TY_(IsHTMLSpace)(*s++))
13751372
return no;
13761373

13771374
return yes;

src/lexer.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,11 @@ Bool TY_(IsLetter)(uint c)
230230
return (map & letter)!=0;
231231
}
232232

233+
Bool TY_(IsHTMLSpace)(uint c)
234+
{
235+
return c == 0x020 || c == 0x009 || c == 0x00a || c == 0x00c || c == 0x00d;
236+
}
237+
233238
Bool TY_(IsNamechar)(uint c)
234239
{
235240
uint map = MAP(c);

src/lexer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
416416
Bool TY_(IsWhite)(uint c);
417417
Bool TY_(IsDigit)(uint c);
418418
Bool TY_(IsLetter)(uint c);
419+
Bool TY_(IsHTMLSpace)(uint c);
419420
Bool TY_(IsNewline)(uint c);
420421
Bool TY_(IsNamechar)(uint c);
421422
Bool TY_(IsXMLLetter)(uint c);

0 commit comments

Comments
 (0)