diff --git a/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs b/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs index f17b63600..475da73b5 100644 --- a/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs +++ b/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs @@ -25,7 +25,6 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar) AllHtmlTerms.Rule = TopLevelInlineDeclaration | PBlockDeclaration | PreBlockDeclaration - | IgnorableElementDeclaration ; var inlineDeclaration = new NonTerminal ("", ConcatChildNodes) { @@ -102,41 +101,36 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar) InlineHyperLinkDeclaration.Rule = InlineHyperLinkOpenTerm + InlineDeclarations + CreateEndElement ("a", grammar, optional: true); InlineHyperLinkDeclaration.AstConfig.NodeCreator = (context, parseNode) => { - var unparsedAElementValue = string.Empty; - foreach (var cn in parseNode.ChildNodes) { - if (cn.ChildNodes?.Count > 1) { - foreach (var gcn in cn.ChildNodes) { - unparsedAElementValue += gcn.AstNode?.ToString (); - } - } else { - unparsedAElementValue += cn.AstNode?.ToString (); - } - } + var nodesAsString = GetChildNodesAsString (parseNode); + var tokenValue = parseNode.ChildNodes [0].Token.Text; + int stopIndex = nodesAsString.IndexOf ('>'); - var seeElement = TryParseHRef (unparsedAElementValue); - if (seeElement == null) - seeElement = TryParseHRef (WebUtility.HtmlDecode (unparsedAElementValue), logError: true); + if (stopIndex == -1 || !tokenValue.Contains ("href", StringComparison.OrdinalIgnoreCase)) { + parseNode.AstNode = new XText (nodesAsString); + return; + } - var hrefValue = seeElement?.Attribute ("href")?.Value ?? string.Empty; - if (!string.IsNullOrEmpty (hrefValue) && - (hrefValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase) || hrefValue.StartsWith ("www", StringComparison.OrdinalIgnoreCase))) { - parseNode.AstNode = seeElement; + var attributeName = parseNode.ChildNodes [0].Term.Name; + var attributeValue = nodesAsString.Substring (0, stopIndex).Trim ().Trim ('\'', '"'); + var elementValue = nodesAsString.Substring (stopIndex + 1); + if (!string.IsNullOrEmpty (attributeValue) && attributeValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase)) { + var unparsed = $"{elementValue}"; + XNode? seeElement = TryParseElement (unparsed); + if (seeElement == null) { + // Try to parse with HTML entities decoded + seeElement = TryParseElement (WebUtility.HtmlDecode (unparsed)); + if (seeElement == null) { + // Finally, try to parse with only the element value encoded + seeElement = TryParseElement ($"{WebUtility.HtmlEncode (elementValue)}", logError: true); + } + } + parseNode.AstNode = seeElement ?? new XText (nodesAsString); } else { // TODO: Need to convert relative paths or code references to appropriate CREF value. - parseNode.AstNode = new XText (unparsedAElementValue); + parseNode.AstNode = new XText (elementValue); } }; - // Start to trim out unusable HTML elements/tags, but not any inner values - IgnorableElementDeclaration.Rule = - CreateStartElementIgnoreAttribute ("a", "name") + InlineDeclarations + CreateEndElement ("a", grammar, optional: true) - | CreateStartElementIgnoreAttribute ("a", "id") + InlineDeclarations + CreateEndElement ("a", grammar, optional: true) - ; - IgnorableElementDeclaration.AstConfig.NodeCreator = (context, parseNode) => { - var aElementValue = new XText (parseNode.ChildNodes [1].AstNode.ToString () ?? string.Empty); - parseNode.AstNode = aElementValue; - }; - CodeElementDeclaration.Rule = CreateStartElement ("code", grammar) + InlineDeclarations + CreateEndElement ("code", grammar); CodeElementDeclaration.AstConfig.NodeCreator = (context, parseNode) => { var target = parseNode.ChildNodes [1].AstNode; @@ -184,13 +178,28 @@ static IEnumerable GetParagraphs (ParseTreeNodeList children) } } - static XElement? TryParseHRef (string unparsedAElementValue, bool logError = false) + static string GetChildNodesAsString (ParseTreeNode parseNode) + { + var unparsed = string.Empty; + foreach (var cn in parseNode.ChildNodes) { + if (cn.ChildNodes?.Count > 1) { + foreach (var gcn in cn.ChildNodes) { + unparsed += gcn.AstNode?.ToString (); + } + } else { + unparsed += cn.AstNode?.ToString (); + } + } + return unparsed; + } + + static XElement? TryParseElement (string unparsed, bool logError = false) { try { - return XElement.Parse ($""); + return XElement.Parse (unparsed); } catch (Exception x) { if (logError) - Console.Error.WriteLine ($"## Unable to parse HTML element: \n{x.GetType ()}: {x.Message}"); + Console.Error.WriteLine ($"## Unable to parse HTML element: `{unparsed}`\n{x.GetType ()}: {x.Message}"); return null; } } @@ -221,10 +230,9 @@ static IEnumerable GetParagraphs (ParseTreeNodeList children) public readonly NonTerminal PBlockDeclaration = new NonTerminal (nameof (PBlockDeclaration), ConcatChildNodes); public readonly NonTerminal PreBlockDeclaration = new NonTerminal (nameof (PreBlockDeclaration), ConcatChildNodes); public readonly NonTerminal InlineHyperLinkDeclaration = new NonTerminal (nameof (InlineHyperLinkDeclaration), ConcatChildNodes); - public readonly NonTerminal IgnorableElementDeclaration = new NonTerminal (nameof (IgnorableElementDeclaration), ConcatChildNodes); public readonly NonTerminal CodeElementDeclaration = new NonTerminal (nameof (CodeElementDeclaration), ConcatChildNodes); - public readonly Terminal InlineHyperLinkOpenTerm = new RegexBasedTerminal (" { - parseNode.AstNode = new XText (parseNode.ChildNodes [0].Term.Name.Trim ()); - }; - InlineParamDeclaration.Rule = grammar.ToTerm ("{@param") + InlineValue + "}"; InlineParamDeclaration.AstConfig.NodeCreator = (context, parseNode) => { var target = parseNode.ChildNodes [1].AstNode; @@ -156,9 +147,38 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar) // https://docs.oracle.com/javase/7/docs/technotes/tools/windows/javadoc.html#value public readonly NonTerminal ValueDeclaration = new NonTerminal (nameof (ValueDeclaration)); - public readonly NonTerminal IgnorableDeclaration = new NonTerminal (nameof (IgnorableDeclaration)); - public readonly NonTerminal InlineParamDeclaration = new NonTerminal (nameof (InlineParamDeclaration)); + + public readonly Terminal IgnorableDeclaration = new IgnorableCharTerminal (nameof (IgnorableDeclaration)) { + AstConfig = new AstNodeConfig { + NodeCreator = (context, parseNode) => parseNode.AstNode = parseNode.Token.Value.ToString (), + }, + }; + } } + + class IgnorableCharTerminal : Terminal + { + public IgnorableCharTerminal (string name) + : base (name) + { + Priority = TerminalPriority.Low - 1; + } + + public override Token? TryMatch (ParsingContext context, ISourceStream source) + { + var startChar = source.Text [source.Location.Position]; + if (startChar != '@' + && startChar != '{' + && startChar != '}' + ) { + return null; + } + source.PreviewPosition += 1; + return source.CreateToken (OutputTerminal, startChar); + } + + } + } diff --git a/tests/Java.Interop.Tools.JavaSource-Tests/SourceJavadocToXmldocGrammar.HtmlBnfTermsTests.cs b/tests/Java.Interop.Tools.JavaSource-Tests/SourceJavadocToXmldocGrammar.HtmlBnfTermsTests.cs index eaef58232..e699d9c1a 100644 --- a/tests/Java.Interop.Tools.JavaSource-Tests/SourceJavadocToXmldocGrammar.HtmlBnfTermsTests.cs +++ b/tests/Java.Interop.Tools.JavaSource-Tests/SourceJavadocToXmldocGrammar.HtmlBnfTermsTests.cs @@ -69,7 +69,27 @@ public void HyperLinkDeclaration () r = p.Parse ("field classification"); Assert.IsFalse (r.HasErrors (), DumpMessages (r, p)); - Assert.AreEqual ("\"AutofillService.html#FieldClassification\">field classification", + Assert.AreEqual ("field classification", r.Root.AstNode.ToString ()); + + r = p.Parse ("here"); + Assert.IsFalse (r.HasErrors (), DumpMessages (r, p)); + Assert.AreEqual ("here", r.Root.AstNode.ToString ()); + + r = p.Parse ("libphonenumber"); + Assert.IsFalse (r.HasErrors (), DumpMessages (r, p)); + Assert.AreEqual ("libphonenumber", r.Root.AstNode.ToString ()); + + r = p.Parse (" broken"); + Assert.IsFalse (r.HasErrors (), DumpMessages (r, p)); + Assert.AreEqual (" broken", r.Root.AstNode.ToString ()); + + r = p.Parse ("nobody"); + Assert.IsFalse (r.HasErrors (), DumpMessages (r, p)); + Assert.AreEqual ("nobody", r.Root.AstNode.ToString ()); + + r = p.Parse ("\nProgress & activity"); + Assert.IsFalse (r.HasErrors (), DumpMessages (r, p)); + Assert.AreEqual ($"{Environment.NewLine}Progress & activity", r.Root.AstNode.ToString ()); }