diff --git a/html2confluence.gemspec b/html2confluence.gemspec
index 5ae0c53..09a7ddc 100644
--- a/html2confluence.gemspec
+++ b/html2confluence.gemspec
@@ -1,7 +1,7 @@
Gem::Specification.new do |s|
s.platform = Gem::Platform::RUBY
s.name = 'html2confluence'
- s.version = "1.3.8"
+ s.version = "1.3.9"
s.summary = 'Converter from HTML to Confluence Wiki Markup'
s.description = 'Provides an SGML parser to convert HTML into the Wiki Markup format'
@@ -13,4 +13,6 @@ Gem::Specification.new do |s|
s.require_path = 'lib'
s.files = Dir.glob("{lib,spec}/**/*") + %w(example.rb README.mdown)
+
+ s.add_dependency "nokogiri"
end
diff --git a/lib/html2confluence.rb b/lib/html2confluence.rb
index 1bfd178..a77ac41 100644
--- a/lib/html2confluence.rb
+++ b/lib/html2confluence.rb
@@ -1,5 +1,7 @@
require 'rexml/document'
+require 'nokogiri' # For validating html from our editor
+
# A class to convert HTML to textile. Based on the python parser
# found at http://aftnn.org/content/code/html2textile/
#
@@ -406,6 +408,14 @@ def preprocess(data)
# Fix unclosed
data.gsub!(/(
]+)(?/, '\1 />')
+
+ # Parse with nokogiri to ensure not tags are left unclosed
+ # Ensure a parsing error from Nokogiri can't stop processing to get better error from REXML
+ begin
+ validated_data = Nokogiri::HTML::fragment(data).to_xml
+ data = validated_data
+ rescue Exception => e
+ end
data
end