Skip to content

Commit 369cabf

Browse files
committed
Conditionally htmlEscape chars based on encoding
This commit adds new htmlEscape methods that take the character encoding as a parameter. According to specs and recommendations, the list of chars to be html escaped depends on the encoding used in the response. If the current char encoding supports chars natively, we shouldn't escape those; of course, reserved chars (<,>,',",&) should always be escaped. See: http://www.w3.org/TR/html4/sgml/entities.html#h-24.3 See: #385 by @candrews Issue: SPR-9293
1 parent 4d3ade5 commit 369cabf

File tree

4 files changed

+133
-7
lines changed

4 files changed

+133
-7
lines changed

spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2002-2012 the original author or authors.
2+
* Copyright 2002-2014 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -107,14 +107,42 @@ public int getSupportedReferenceCount() {
107107
* Return true if the given character is mapped to a supported entity reference.
108108
*/
109109
public boolean isMappedToReference(char character) {
110-
return (convertToReference(character) != null);
110+
return isMappedToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING);
111+
}
112+
113+
/**
114+
* Return true if the given character is mapped to a supported entity reference.
115+
*/
116+
public boolean isMappedToReference(char character, String encoding) {
117+
return (convertToReference(character, encoding) != null);
111118
}
112119

113120
/**
114121
* Return the reference mapped to the given character or {@code null}.
115122
*/
116123
public String convertToReference(char character) {
117-
if (character < 1000 || (character >= 8000 && character < 10000)) {
124+
return convertToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING);
125+
}
126+
127+
/**
128+
* Return the reference mapped to the given character or {@code null}.
129+
*/
130+
public String convertToReference(char character, String encoding) {
131+
if(encoding.startsWith("UTF-")){
132+
switch(character){
133+
case '<':
134+
return "&lt;";
135+
case '>':
136+
return "&gt;";
137+
case '"':
138+
return "&quot;";
139+
case '&':
140+
return "&amp;";
141+
case '\'':
142+
return "&#39;";
143+
}
144+
}
145+
else if (character < 1000 || (character >= 8000 && character < 10000)) {
118146
int index = (character < 1000 ? character : character - 7000);
119147
String entityReference = this.characterToEntityReferenceMap[index];
120148
if (entityReference != null) {

spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2002-2012 the original author or authors.
2+
* Copyright 2002-2014 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -16,6 +16,8 @@
1616

1717
package org.springframework.web.util;
1818

19+
import org.springframework.util.Assert;
20+
1921
/**
2022
* Utility class for HTML escaping. Escapes and unescapes
2123
* based on the W3C HTML 4.01 recommendation, handling
@@ -57,13 +59,33 @@ public abstract class HtmlUtils {
5759
* @return the escaped string
5860
*/
5961
public static String htmlEscape(String input) {
62+
return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
63+
}
64+
65+
/**
66+
* Turn special characters into HTML character references.
67+
* Handles complete character set defined in HTML 4.01 recommendation.
68+
* <p>Escapes all special characters to their corresponding
69+
* entity reference (e.g. {@code &lt;}) at least as required by the
70+
* specified encoding. In other words, if a special character does
71+
* not have to be escaped for the given encoding, it may not be.
72+
* <p>Reference:
73+
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
74+
* http://www.w3.org/TR/html4/sgml/entities.html
75+
* </a>
76+
* @param input the (unescaped) input string
77+
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
78+
* @return the escaped string
79+
*/
80+
public static String htmlEscape(String input, String encoding) {
81+
Assert.notNull(encoding, "encoding is required");
6082
if (input == null) {
6183
return null;
6284
}
6385
StringBuilder escaped = new StringBuilder(input.length() * 2);
6486
for (int i = 0; i < input.length(); i++) {
6587
char character = input.charAt(i);
66-
String reference = characterEntityReferences.convertToReference(character);
88+
String reference = characterEntityReferences.convertToReference(character, encoding);
6789
if (reference != null) {
6890
escaped.append(reference);
6991
}
@@ -87,13 +109,33 @@ public static String htmlEscape(String input) {
87109
* @return the escaped string
88110
*/
89111
public static String htmlEscapeDecimal(String input) {
112+
return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
113+
}
114+
115+
/**
116+
* Turn special characters into HTML character references.
117+
* Handles complete character set defined in HTML 4.01 recommendation.
118+
* <p>Escapes all special characters to their corresponding numeric
119+
* reference in decimal format (&#<i>Decimal</i>;) at least as required by the
120+
* specified encoding. In other words, if a special character does
121+
* not have to be escaped for the given encoding, it may not be.
122+
* <p>Reference:
123+
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
124+
* http://www.w3.org/TR/html4/sgml/entities.html
125+
* </a>
126+
* @param input the (unescaped) input string
127+
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
128+
* @return the escaped string
129+
*/
130+
public static String htmlEscapeDecimal(String input, String encoding) {
131+
Assert.notNull(encoding, "encoding is required");
90132
if (input == null) {
91133
return null;
92134
}
93135
StringBuilder escaped = new StringBuilder(input.length() * 2);
94136
for (int i = 0; i < input.length(); i++) {
95137
char character = input.charAt(i);
96-
if (characterEntityReferences.isMappedToReference(character)) {
138+
if (characterEntityReferences.isMappedToReference(character, encoding)) {
97139
escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START);
98140
escaped.append((int) character);
99141
escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);
@@ -118,13 +160,33 @@ public static String htmlEscapeDecimal(String input) {
118160
* @return the escaped string
119161
*/
120162
public static String htmlEscapeHex(String input) {
163+
return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
164+
}
165+
166+
/**
167+
* Turn special characters into HTML character references.
168+
* Handles complete character set defined in HTML 4.01 recommendation.
169+
* <p>Escapes all special characters to their corresponding numeric
170+
* reference in hex format (&#x<i>Hex</i>;) at least as required by the
171+
* specified encoding. In other words, if a special character does
172+
* not have to be escaped for the given encoding, it may not be.
173+
* <p>Reference:
174+
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
175+
* http://www.w3.org/TR/html4/sgml/entities.html
176+
* </a>
177+
* @param input the (unescaped) input string
178+
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
179+
* @return the escaped string
180+
*/
181+
public static String htmlEscapeHex(String input, String encoding) {
182+
Assert.notNull(encoding, "encoding is required");
121183
if (input == null) {
122184
return null;
123185
}
124186
StringBuilder escaped = new StringBuilder(input.length() * 2);
125187
for (int i = 0; i < input.length(); i++) {
126188
char character = input.charAt(i);
127-
if (characterEntityReferences.isMappedToReference(character)) {
189+
if (characterEntityReferences.isMappedToReference(character, encoding)) {
128190
escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START);
129191
escaped.append(Integer.toString(character, 16));
130192
escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);

spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,20 @@ else if (character == 39) {
7676
(char) -1, entityReferences.convertToCharacter("invalid"));
7777
}
7878

79+
// SPR-9293
80+
@Test
81+
public void testConvertToReferenceUTF8() {
82+
HtmlCharacterEntityReferences entityReferences = new HtmlCharacterEntityReferences();
83+
String utf8 = "UTF-8";
84+
assertEquals("&lt;", entityReferences.convertToReference('<', utf8));
85+
assertEquals("&gt;", entityReferences.convertToReference('>', utf8));
86+
assertEquals("&amp;", entityReferences.convertToReference('&', utf8));
87+
assertEquals("&quot;", entityReferences.convertToReference('"', utf8));
88+
assertEquals("&#39;", entityReferences.convertToReference('\'', utf8));
89+
assertNull(entityReferences.convertToReference((char) 233, utf8));
90+
assertNull(entityReferences.convertToReference((char) 934, utf8));
91+
}
92+
7993
private Map<Integer, String> getReferenceCharacterMap() {
8094
CharacterEntityResourceIterator entityIterator = new CharacterEntityResourceIterator();
8195
Map<Integer, String> referencedCharactersMap = new HashMap<Integer, String>();

spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,28 @@ public void testEncodeIntoHtmlCharacterSet() {
7171
"&#977;", HtmlUtils.htmlEscapeDecimal("" + (char) 977));
7272
}
7373

74+
// SPR-9293
75+
@Test
76+
public void testEncodeIntoHtmlCharacterSetFromUtf8() {
77+
String utf8 = ("UTF-8");
78+
assertNull("A null string should be converted to a null string",
79+
HtmlUtils.htmlEscape(null, utf8));
80+
assertEquals("An empty string should be converted to an empty string",
81+
"", HtmlUtils.htmlEscape("", utf8));
82+
assertEquals("A string containing no special characters should not be affected",
83+
"A sentence containing no special characters.",
84+
HtmlUtils.htmlEscape("A sentence containing no special characters."));
85+
86+
assertEquals("'< >' should be encoded to '&lt; &gt;'",
87+
"&lt; &gt;", HtmlUtils.htmlEscape("< >", utf8));
88+
assertEquals("'< >' should be encoded to '&#60; &#62;'",
89+
"&#60; &#62;", HtmlUtils.htmlEscapeDecimal("< >", utf8));
90+
91+
assertEquals("UTF-8 supported chars should not be escaped",
92+
"Μερικοί Ελληνικοί &quot;χαρακτήρες&quot;",
93+
HtmlUtils.htmlEscape("Μερικοί Ελληνικοί \"χαρακτήρες\"", utf8));
94+
}
95+
7496
@Test
7597
public void testDecodeFromHtmlCharacterSet() {
7698
assertNull("A null string should be converted to a null string",

0 commit comments

Comments
 (0)