Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
AllHtmlTerms.Rule = TopLevelInlineDeclaration
| PBlockDeclaration
| PreBlockDeclaration
| IgnorableElementDeclaration
;

var inlineDeclaration = new NonTerminal ("<html inline decl>", ConcatChildNodes) {
Expand Down Expand Up @@ -102,41 +101,36 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)

InlineHyperLinkDeclaration.Rule = InlineHyperLinkOpenTerm + InlineDeclarations + CreateEndElement ("a", grammar, optional: true);
InlineHyperLinkDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var unparsedAElementValue = string.Empty;
foreach (var cn in parseNode.ChildNodes) {
if (cn.ChildNodes?.Count > 1) {
foreach (var gcn in cn.ChildNodes) {
unparsedAElementValue += gcn.AstNode?.ToString ();
}
} else {
unparsedAElementValue += cn.AstNode?.ToString ();
}
}
var nodesAsString = GetChildNodesAsString (parseNode);
var tokenValue = parseNode.ChildNodes [0].Token.Text;
int stopIndex = nodesAsString.IndexOf ('>');

var seeElement = TryParseHRef (unparsedAElementValue);
if (seeElement == null)
seeElement = TryParseHRef (WebUtility.HtmlDecode (unparsedAElementValue), logError: true);
if (stopIndex == -1 || !tokenValue.Contains ("href", StringComparison.OrdinalIgnoreCase)) {
parseNode.AstNode = new XText (nodesAsString);
return;
}

var hrefValue = seeElement?.Attribute ("href")?.Value ?? string.Empty;
if (!string.IsNullOrEmpty (hrefValue) &&
(hrefValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase) || hrefValue.StartsWith ("www", StringComparison.OrdinalIgnoreCase))) {
parseNode.AstNode = seeElement;
var attributeName = parseNode.ChildNodes [0].Term.Name;
var attributeValue = nodesAsString.Substring (0, stopIndex).Trim ().Trim ('\'', '"');
var elementValue = nodesAsString.Substring (stopIndex + 1);
if (!string.IsNullOrEmpty (attributeValue) && attributeValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase)) {
var unparsed = $"<see href=\"{attributeValue}\">{elementValue}</see>";
XNode? seeElement = TryParseElement (unparsed);
if (seeElement == null) {
// Try to parse with HTML entities decoded
seeElement = TryParseElement (WebUtility.HtmlDecode (unparsed));
if (seeElement == null) {
// Finally, try to parse with only the element value encoded
seeElement = TryParseElement ($"<see href=\"{attributeValue}\">{WebUtility.HtmlEncode (elementValue)}</see>", logError: true);
}
}
parseNode.AstNode = seeElement ?? new XText (nodesAsString);
} else {
// TODO: Need to convert relative paths or code references to appropriate CREF value.
parseNode.AstNode = new XText (unparsedAElementValue);
parseNode.AstNode = new XText (elementValue);
}
};

// Start to trim out unusable HTML elements/tags, but not any inner values
IgnorableElementDeclaration.Rule =
CreateStartElementIgnoreAttribute ("a", "name") + InlineDeclarations + CreateEndElement ("a", grammar, optional: true)
| CreateStartElementIgnoreAttribute ("a", "id") + InlineDeclarations + CreateEndElement ("a", grammar, optional: true)
;
IgnorableElementDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var aElementValue = new XText (parseNode.ChildNodes [1].AstNode.ToString () ?? string.Empty);
parseNode.AstNode = aElementValue;
};

CodeElementDeclaration.Rule = CreateStartElement ("code", grammar) + InlineDeclarations + CreateEndElement ("code", grammar);
CodeElementDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var target = parseNode.ChildNodes [1].AstNode;
Expand Down Expand Up @@ -184,13 +178,28 @@ static IEnumerable<XElement> GetParagraphs (ParseTreeNodeList children)
}
}

static XElement? TryParseHRef (string unparsedAElementValue, bool logError = false)
static string GetChildNodesAsString (ParseTreeNode parseNode)
{
var unparsed = string.Empty;
foreach (var cn in parseNode.ChildNodes) {
if (cn.ChildNodes?.Count > 1) {
foreach (var gcn in cn.ChildNodes) {
unparsed += gcn.AstNode?.ToString ();
}
} else {
unparsed += cn.AstNode?.ToString ();
}
}
return unparsed;
}

static XElement? TryParseElement (string unparsed, bool logError = false)
{
try {
return XElement.Parse ($"<see href={unparsedAElementValue}</see>");
return XElement.Parse (unparsed);
} catch (Exception x) {
if (logError)
Console.Error.WriteLine ($"## Unable to parse HTML element: <see href={unparsedAElementValue}</see>\n{x.GetType ()}: {x.Message}");
Console.Error.WriteLine ($"## Unable to parse HTML element: `{unparsed}`\n{x.GetType ()}: {x.Message}");
return null;
}
}
Expand Down Expand Up @@ -221,10 +230,9 @@ static IEnumerable<XElement> GetParagraphs (ParseTreeNodeList children)
public readonly NonTerminal PBlockDeclaration = new NonTerminal (nameof (PBlockDeclaration), ConcatChildNodes);
public readonly NonTerminal PreBlockDeclaration = new NonTerminal (nameof (PreBlockDeclaration), ConcatChildNodes);
public readonly NonTerminal InlineHyperLinkDeclaration = new NonTerminal (nameof (InlineHyperLinkDeclaration), ConcatChildNodes);
public readonly NonTerminal IgnorableElementDeclaration = new NonTerminal (nameof (IgnorableElementDeclaration), ConcatChildNodes);
public readonly NonTerminal CodeElementDeclaration = new NonTerminal (nameof (CodeElementDeclaration), ConcatChildNodes);

public readonly Terminal InlineHyperLinkOpenTerm = new RegexBasedTerminal ("<a href=", @"(?i)<a\s*href\s*=") {
public readonly Terminal InlineHyperLinkOpenTerm = new RegexBasedTerminal ("<a attr=", @"(?i)<a\s*.*=") {
AstConfig = new AstNodeConfig {
NodeCreator = (context, parseNode) => parseNode.AstNode = "",
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,6 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
}
};

// Inline content may contain reserved characters with no tags or special parsing rules, do not throw when encountering them
IgnorableDeclaration.Rule = grammar.ToTerm ("@ ")
| grammar.ToTerm ("{")
| grammar.ToTerm ("}")
;
IgnorableDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
parseNode.AstNode = new XText (parseNode.ChildNodes [0].Term.Name.Trim ());
};

InlineParamDeclaration.Rule = grammar.ToTerm ("{@param") + InlineValue + "}";
InlineParamDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var target = parseNode.ChildNodes [1].AstNode;
Expand Down Expand Up @@ -156,9 +147,38 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
// https://docs.oracle.com/javase/7/docs/technotes/tools/windows/javadoc.html#value
public readonly NonTerminal ValueDeclaration = new NonTerminal (nameof (ValueDeclaration));

public readonly NonTerminal IgnorableDeclaration = new NonTerminal (nameof (IgnorableDeclaration));

public readonly NonTerminal InlineParamDeclaration = new NonTerminal (nameof (InlineParamDeclaration));

public readonly Terminal IgnorableDeclaration = new IgnorableCharTerminal (nameof (IgnorableDeclaration)) {
AstConfig = new AstNodeConfig {
NodeCreator = (context, parseNode) => parseNode.AstNode = parseNode.Token.Value.ToString (),
},
};

}
}

class IgnorableCharTerminal : Terminal
{
public IgnorableCharTerminal (string name)
: base (name)
{
Priority = TerminalPriority.Low - 1;
}

public override Token? TryMatch (ParsingContext context, ISourceStream source)
{
var startChar = source.Text [source.Location.Position];
if (startChar != '@'
&& startChar != '{'
&& startChar != '}'
) {
return null;
}
source.PreviewPosition += 1;
return source.CreateToken (OutputTerminal, startChar);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,27 @@ public void HyperLinkDeclaration ()

r = p.Parse ("<a href=\"AutofillService.html#FieldClassification\">field classification</a>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ("\"AutofillService.html#FieldClassification\"&gt;field classification",
Assert.AreEqual ("field classification", r.Root.AstNode.ToString ());

r = p.Parse ("<a href=https://www.sqlite.org/pragma.html#pragma_journal_mode>here</a>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ("<see href=\"https://www.sqlite.org/pragma.html#pragma_journal_mode\">here</see>", r.Root.AstNode.ToString ());

r = p.Parse ("<a href=\"https://github.com/google/libphonenumber>libphonenumber</a>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ("<see href=\"https://github.com/google/libphonenumber\">libphonenumber</see>", r.Root.AstNode.ToString ());

r = p.Parse ("<a href=#BROKEN> broken</a>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual (" broken", r.Root.AstNode.ToString ());

r = p.Parse ("<a href=\"mailto:[email protected]\">nobody</a>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ("nobody", r.Root.AstNode.ToString ());

r = p.Parse ("<a href='https://material.io/guidelines/components/progress-activity.html#progress-activity-types-of-indicators'>\nProgress & activity</a>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ($"<see href=\"https://material.io/guidelines/components/progress-activity.html#progress-activity-types-of-indicators\">{Environment.NewLine}Progress &amp; activity</see>",
r.Root.AstNode.ToString ());
}

Expand Down