Skip to content

Commit 1052c2b

Browse files
New merge-emphasis & coerce-endtags options added.
Fixes #19.
1 parent 3ed33a1 commit 1052c2b

File tree

7 files changed

+80
-5
lines changed

7 files changed

+80
-5
lines changed

include/tidyenum.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ typedef enum
112112
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */
113113
TidyIndentContent, /**< Indent content of appropriate tags */
114114
/**< "auto" does text/block level content indentation */
115+
TidyCoerceEndTags, /**< Coerce end tags from start tags where probably intended */
115116
TidyHideEndTags, /**< Suppress optional end tags */
116117
TidyXmlTags, /**< Treat input as XML */
117118
TidyXmlOut, /**< Create output as XML */
@@ -199,6 +200,7 @@ typedef enum
199200
#else
200201
TidyPunctWrapNotUsed,
201202
#endif
203+
TidyMergeEmphasis, /**< Merge nested B and I elements */
202204
TidyMergeDivs, /**< Merge multiple DIVs */
203205
TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */
204206
TidyPreserveEntities, /**< Preserve entities */

quickref.html

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
<body>
99
<h1 id="top">Quick Reference</h1>
1010
<h2>HTML Tidy Configuration Options</h2>
11-
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/5c4d2e5">https://github.com/w3c/tidy-html5/tree/5c4d2e5</a></p>
11+
<p>Version: <a href="https://github.com/w3c/tidy-html5/tree/3a9a794">https://github.com/w3c/tidy-html5/tree/3a9a794</a></p>
1212
<p>
1313
<a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a>
1414
<br />
@@ -88,6 +88,13 @@ <h2>HTML Tidy Configuration Options</h2>
8888
<td>Boolean</td>
8989
<td>no</td>
9090
</tr>
91+
<tr>
92+
<td>
93+
<a href="#coerce-endtags">coerce-endtags</a>
94+
</td>
95+
<td>Boolean</td>
96+
<td>yes</td>
97+
</tr>
9198
<tr>
9299
<td>
93100
<a href="#css-prefix">css-prefix</a>
@@ -251,6 +258,13 @@ <h2>HTML Tidy Configuration Options</h2>
251258
<td>AutoBool</td>
252259
<td>auto</td>
253260
</tr>
261+
<tr>
262+
<td>
263+
<a href="#merge-emphasis">merge-emphasis</a>
264+
</td>
265+
<td>Boolean</td>
266+
<td>yes</td>
267+
</tr>
254268
<tr>
255269
<td>
256270
<a href="#merge-spans">merge-spans</a>
@@ -866,6 +880,23 @@ <h2>HTML Tidy Configuration Options</h2>
866880
<tr>
867881
<td>&#160;</td>
868882
</tr>
883+
<tr>
884+
<td class="tabletitle" valign="top" id="coerce-endtags">coerce-endtags</td>
885+
<td class="tabletitlelink" valign="top" align="right">
886+
<a href="#top">Top</a>
887+
</td>
888+
</tr>
889+
<tr>
890+
<td valign="top">Type: <strong>Boolean</strong><br />
891+
Default: <strong>yes</strong><br />Example: <strong>y/n, yes/no, t/f, true/false, 1/0</strong></td>
892+
<td align="right" valign="top"></td>
893+
</tr>
894+
<tr>
895+
<td colspan="2">This option specifies if Tidy should coerce a start tag into an end tag in cases where it looks like an end tag was probably intended; for example, given &lt;span&gt;foo &lt;b&gt;bar&lt;b&gt; baz&lt;/span&gt;, Tidy will output &lt;span&gt;foo &lt;b&gt;bar&lt;/b&gt; baz&lt;/span&gt;. </td>
896+
</tr>
897+
<tr>
898+
<td>&#160;</td>
899+
</tr>
869900
<tr>
870901
<td class="tabletitle" valign="top" id="css-prefix">css-prefix</td>
871902
<td class="tabletitlelink" valign="top" align="right">
@@ -1271,6 +1302,23 @@ <h2>HTML Tidy Configuration Options</h2>
12711302
<tr>
12721303
<td>&#160;</td>
12731304
</tr>
1305+
<tr>
1306+
<td class="tabletitle" valign="top" id="merge-emphasis">merge-emphasis</td>
1307+
<td class="tabletitlelink" valign="top" align="right">
1308+
<a href="#top">Top</a>
1309+
</td>
1310+
</tr>
1311+
<tr>
1312+
<td valign="top">Type: <strong>Boolean</strong><br />
1313+
Default: <strong>yes</strong><br />Example: <strong>y/n, yes/no, t/f, true/false, 1/0</strong></td>
1314+
<td align="right" valign="top"></td>
1315+
</tr>
1316+
<tr>
1317+
<td colspan="2">This option specifies if Tidy should merge nested &lt;b&gt; and &lt;i&gt; elements; for example, for the case &lt;b class="rtop-2"&gt;foo &lt;b class="r2-2"&gt;bar&lt;/b&gt; baz&lt;/b&gt;, Tidy will output &lt;b class="rtop-2"&gt;foo bar baz&lt;/b&gt;. Note that if you set this option to "no" Tidy will by default still attempt to correct some cases of nested &lt;b&gt; and &lt;i&gt; elements; for example, given &lt;span&gt;foo &lt;b&gt;bar &lt;b&gt;baz&lt;/b&gt;&lt;/b&gt; &lt;/span&gt;, Tidy will output &lt;span&gt;foo &lt;b&gt;bar baz&lt;/b&gt;&lt;/span&gt;. To suppress that behavior, set the coerce-endtags option to "no". </td>
1318+
</tr>
1319+
<tr>
1320+
<td>&#160;</td>
1321+
</tr>
12741322
<tr>
12751323
<td class="tabletitle" valign="top" id="merge-spans">merge-spans</td>
12761324
<td class="tabletitlelink" valign="top" align="right">

src/config.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ static const TidyOptionImpl option_defs[] =
239239
{ TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks },
240240
{ TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks },
241241
{ TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks },
242+
{ TidyCoerceEndTags, MU, "coerce-endtags", BL, yes, ParseBool, boolPicks },
242243
{ TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks },
243244
{ TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks },
244245
{ TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks },
@@ -311,6 +312,7 @@ static const TidyOptionImpl option_defs[] =
311312
#if SUPPORT_ASIAN_ENCODINGS
312313
{ TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks },
313314
#endif
315+
{ TidyMergeEmphasis, MU, "merge-emphasis", BL, yes, ParseBool, boolPicks },
314316
{ TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
315317
{ TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks },
316318
{ TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks },

src/localize.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,12 @@ static const TidyOptionDoc option_docs[] =
470470
{TidyHideComments,
471471
"This option specifies if Tidy should print out comments. "
472472
},
473+
{TidyCoerceEndTags,
474+
"This option specifies if Tidy should coerce a start tag into an end tag "
475+
"in cases where it looks like an end tag was probably intended; "
476+
"for example, given &lt;span&gt;foo &lt;b&gt;bar&lt;b&gt; baz&lt;/span&gt;, "
477+
"Tidy will output &lt;span&gt;foo &lt;b&gt;bar&lt;/b&gt; baz&lt;/span&gt;. "
478+
},
473479
{TidyHideEndTags,
474480
"This option specifies if Tidy should omit optional end-tags when "
475481
"generating the pretty printed markup. This option is ignored if you are "
@@ -504,6 +510,18 @@ static const TidyOptionDoc option_docs[] =
504510
"that takes a list of predefined values to lower case. This is required "
505511
"for XHTML documents. "
506512
},
513+
{TidyMergeEmphasis,
514+
"This option specifies if Tidy should merge nested &lt;b&gt; and &lt;i&gt; "
515+
"elements; for example, for the case "
516+
"&lt;b class=\"rtop-2\"&gt;foo &lt;b class=\"r2-2\"&gt;bar&lt;/b&gt; baz&lt;/b&gt;, "
517+
"Tidy will output &lt;b class=\"rtop-2\"&gt;foo bar baz&lt;/b&gt;. "
518+
"Note that if you set this option to \"no\" Tidy will by default still "
519+
"attempt to correct some cases of nested &lt;b&gt; and &lt;i&gt; elements; "
520+
"for example, given "
521+
"&lt;span&gt;foo &lt;b&gt;bar &lt;b&gt;baz&lt;/b&gt;&lt;/b&gt; &lt;/span&gt;, "
522+
"Tidy will output &lt;span&gt;foo &lt;b&gt;bar baz&lt;/b&gt;&lt;/span&gt;. "
523+
"To suppress that behavior, set the coerce-endtags option to \"no\". "
524+
},
507525
{TidyMergeDivs,
508526
"Can be used to modify behavior of -c (--clean yes) option. "
509527
"This option specifies if Tidy should merge nested &lt;div&gt; such as "

src/parser.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,6 +1400,7 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
14001400
&& !nodeIsSUP(node)
14011401
&& !nodeIsQ(node)
14021402
&& !nodeIsSPAN(node)
1403+
&& cfgBool(doc, TidyCoerceEndTags)
14031404
)
14041405
{
14051406
/* proceeds only if "node" does not have any attribute and
@@ -1617,7 +1618,8 @@ void TY_(ParseInline)( TidyDocImpl* doc, Node *element, GetTokenMode mode )
16171618
/* #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 */
16181619
/* other fixes by Dave Raggett */
16191620
/* if (node->attributes == NULL) */
1620-
if (node->type != EndTag && node->attributes == NULL)
1621+
if (node->type != EndTag && node->attributes == NULL
1622+
&& cfgBool(doc, TidyCoerceEndTags) )
16211623
{
16221624
node->type = EndTag;
16231625
TY_(ReportError)(doc, element, node, COERCE_TO_ENDTAG);
@@ -3022,7 +3024,8 @@ void TY_(ParseTitle)(TidyDocImpl* doc, Node *title, GetTokenMode ARG_UNUSED(mode
30223024
Node *node;
30233025
while ((node = TY_(GetToken)(doc, MixedContent)) != NULL)
30243026
{
3025-
if (node->tag == title->tag && node->type == StartTag)
3027+
if (node->tag == title->tag && node->type == StartTag
3028+
&& cfgBool(doc, TidyCoerceEndTags) )
30263029
{
30273030
TY_(ReportError)(doc, title, node, COERCE_TO_ENDTAG);
30283031
node->type = EndTag;

src/tidylib.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1246,13 +1246,15 @@ int tidyDocCleanAndRepair( TidyDocImpl* doc )
12461246
Bool tidyMark = cfgBool( doc, TidyMark );
12471247
Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
12481248
Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
1249+
Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
12491250
Node* node;
12501251

12511252
if (tidyXmlTags)
12521253
return tidyDocStatus( doc );
12531254

12541255
/* simplifies <b><b> ... </b> ...</b> etc. */
1255-
TY_(NestedEmphasis)( doc, &doc->root );
1256+
if ( mergeEmphasis )
1257+
TY_(NestedEmphasis)( doc, &doc->root );
12561258

12571259
/* cleans up <dir>indented text</dir> etc. */
12581260
TY_(List2BQ)( doc, &doc->root );

src/version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/3a9a794";
1+
static const char TY_(release_date)[] = "https://github.com/w3c/tidy-html5/tree/3ed33a1";

0 commit comments

Comments
 (0)