diff --git a/metafacture-html/src/main/java/org/metafacture/html/HtmlDecoder.java b/metafacture-html/src/main/java/org/metafacture/html/HtmlDecoder.java index eeb1e4397..f80ac407f 100644 --- a/metafacture-html/src/main/java/org/metafacture/html/HtmlDecoder.java +++ b/metafacture-html/src/main/java/org/metafacture/html/HtmlDecoder.java @@ -98,12 +98,10 @@ private void process(final Element parent, final StreamReceiver receiver) { addedValueAsSubfield = handleAttributeValuesAsSubfields(receiver, element, attributes, attribute); receiver.literal(attribute.getKey(), attribute.getValue()); } - if (element.children().isEmpty()) { - final String text = element.text().trim(); - final String value = text.isEmpty() ? element.data() : text; - if (!value.isEmpty() && !addedValueAsSubfield) { - receiver.literal("value", value); - } + final String text = element.text().trim(); + final String value = text.isEmpty() ? element.data() : text; + if (!value.isEmpty() && !addedValueAsSubfield) { + receiver.literal("value", value); } process(element, receiver); receiver.endEntity(); @@ -133,7 +131,7 @@ private boolean handleAttributeValuesAsSubfields(final StreamReceiver receiver, * @param mapString the attributes to be added as subfields */ public void setAttrValsAsSubfields(final String mapString) { - this.attrValsAsSubfields = new HashMap(); + this.attrValsAsSubfields = new HashMap<>(); final String input = mapString.startsWith("&") ? DEFAULT_ATTR_VALS_AS_SUBFIELDS + mapString : mapString; for (final String nameValuePair : input.split("&")) { final String[] nameValue = nameValuePair.split("="); diff --git a/metafacture-html/src/test/java/org/metafacture/html/HtmlDecoderTest.java b/metafacture-html/src/test/java/org/metafacture/html/HtmlDecoderTest.java index 66d737875..fa70245dd 100644 --- a/metafacture-html/src/test/java/org/metafacture/html/HtmlDecoderTest.java +++ b/metafacture-html/src/test/java/org/metafacture/html/HtmlDecoderTest.java @@ -77,6 +77,17 @@ public void nestedEntities() { } + @Test + public void mixedContent() { + htmlDecoder.process(new StringReader("

This is the full text

")); + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startEntity("p"); + ordered.verify(receiver).literal("value", "This is the full text"); + // elements above plus body, html + ordered.verify(receiver, times(4)).endEntity(); + + } + @Test public void htmlAttributesAsLiterals() { htmlDecoder.process(new StringReader("

Text"));