@@ -168,57 +168,63 @@ def sanitize_token(self, token):
168168 if token_type in (tokenTypes ["StartTag" ], tokenTypes ["EndTag" ],
169169 tokenTypes ["EmptyTag" ]):
170170 if token ["name" ] in self .allowed_elements :
171- if "data" in token :
172- attrs = dict ([(name ,val ) for name ,val in
173- token ["data" ][::- 1 ]
174- if name in self .allowed_attributes ])
175- for attr in self .attr_val_is_uri :
176- if attr not in attrs :
177- continue
178- val_unescaped = re .sub ("[`\000 -\040 \177 -\240 \s]+" , '' ,
179- unescape (attrs [attr ])).lower ()
180- #remove replacement characters from unescaped characters
181- val_unescaped = val_unescaped .replace ("\ufffd " , "" )
182- if (re .match ("^[a-z0-9][-+.a-z0-9]*:" ,val_unescaped ) and
183- (val_unescaped .split (':' )[0 ] not in
184- self .allowed_protocols )):
185- del attrs [attr ]
186- for attr in self .svg_attr_val_allows_ref :
187- if attr in attrs :
188- attrs [attr ] = re .sub (r'url\s*\(\s*[^#\s][^)]+?\)' ,
189- ' ' ,
190- unescape (attrs [attr ]))
191- if (token ["name" ] in self .svg_allow_local_href and
192- 'xlink:href' in attrs and re .search ('^\s*[^#\s].*' ,
193- attrs ['xlink:href' ])):
194- del attrs ['xlink:href' ]
195- if 'style' in attrs :
196- attrs ['style' ] = self .sanitize_css (attrs ['style' ])
197- token ["data" ] = [[name ,val ] for name ,val in list (attrs .items ())]
198- return token
171+ return self .allowed_token (token , token_type )
199172 else :
200- if token_type == tokenTypes ["EndTag" ]:
201- token ["data" ] = "</%s>" % token ["name" ]
202- elif token ["data" ]:
203- attrs = '' .join ([' %s="%s"' % (k ,escape (v )) for k ,v in token ["data" ]])
204- token ["data" ] = "<%s%s>" % (token ["name" ],attrs )
205- else :
206- token ["data" ] = "<%s>" % token ["name" ]
207- if token .get ("selfClosing" ):
208- token ["data" ]= token ["data" ][:- 1 ] + "/>"
209-
210- if token ["type" ] in list (tokenTypes .keys ()):
211- token ["type" ] = "Characters"
212- else :
213- token ["type" ] = tokenTypes ["Characters" ]
214-
215- del token ["name" ]
216- return token
173+ return self .disallowed_token (token , token_type )
217174 elif token_type == tokenTypes ["Comment" ]:
218175 pass
219176 else :
220177 return token
221178
179+ def allowed_token (self , token , token_type ):
180+ if "data" in token :
181+ attrs = dict ([(name ,val ) for name ,val in
182+ token ["data" ][::- 1 ]
183+ if name in self .allowed_attributes ])
184+ for attr in self .attr_val_is_uri :
185+ if attr not in attrs :
186+ continue
187+ val_unescaped = re .sub ("[`\000 -\040 \177 -\240 \s]+" , '' ,
188+ unescape (attrs [attr ])).lower ()
189+ #remove replacement characters from unescaped characters
190+ val_unescaped = val_unescaped .replace ("\ufffd " , "" )
191+ if (re .match ("^[a-z0-9][-+.a-z0-9]*:" ,val_unescaped ) and
192+ (val_unescaped .split (':' )[0 ] not in
193+ self .allowed_protocols )):
194+ del attrs [attr ]
195+ for attr in self .svg_attr_val_allows_ref :
196+ if attr in attrs :
197+ attrs [attr ] = re .sub (r'url\s*\(\s*[^#\s][^)]+?\)' ,
198+ ' ' ,
199+ unescape (attrs [attr ]))
200+ if (token ["name" ] in self .svg_allow_local_href and
201+ 'xlink:href' in attrs and re .search ('^\s*[^#\s].*' ,
202+ attrs ['xlink:href' ])):
203+ del attrs ['xlink:href' ]
204+ if 'style' in attrs :
205+ attrs ['style' ] = self .sanitize_css (attrs ['style' ])
206+ token ["data" ] = [[name ,val ] for name ,val in list (attrs .items ())]
207+ return token
208+
209+ def disallowed_token (self , token , token_type ):
210+ if token_type == tokenTypes ["EndTag" ]:
211+ token ["data" ] = "</%s>" % token ["name" ]
212+ elif token ["data" ]:
213+ attrs = '' .join ([' %s="%s"' % (k ,escape (v )) for k ,v in token ["data" ]])
214+ token ["data" ] = "<%s%s>" % (token ["name" ],attrs )
215+ else :
216+ token ["data" ] = "<%s>" % token ["name" ]
217+ if token .get ("selfClosing" ):
218+ token ["data" ]= token ["data" ][:- 1 ] + "/>"
219+
220+ if token ["type" ] in list (tokenTypes .keys ()):
221+ token ["type" ] = "Characters"
222+ else :
223+ token ["type" ] = tokenTypes ["Characters" ]
224+
225+ del token ["name" ]
226+ return token
227+
222228 def sanitize_css (self , style ):
223229 # disallow urls
224230 style = re .compile ('url\s*\(\s*[^\s)]+?\s*\)\s*' ).sub (' ' ,style )
0 commit comments