From 334e5ce36f38012b98e659313c135b7520101aec Mon Sep 17 00:00:00 2001 From: Jeremy Geros Date: Thu, 11 Jun 2015 09:00:59 +1000 Subject: [PATCH] Pre parse html with nokogiri to fix unclosed tags --- html2confluence.gemspec | 4 +++- lib/html2confluence.rb | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/html2confluence.gemspec b/html2confluence.gemspec index 5ae0c53..09a7ddc 100644 --- a/html2confluence.gemspec +++ b/html2confluence.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.platform = Gem::Platform::RUBY s.name = 'html2confluence' - s.version = "1.3.8" + s.version = "1.3.9" s.summary = 'Converter from HTML to Confluence Wiki Markup' s.description = 'Provides an SGML parser to convert HTML into the Wiki Markup format' @@ -13,4 +13,6 @@ Gem::Specification.new do |s| s.require_path = 'lib' s.files = Dir.glob("{lib,spec}/**/*") + %w(example.rb README.mdown) + + s.add_dependency "nokogiri" end diff --git a/lib/html2confluence.rb b/lib/html2confluence.rb index 1bfd178..a77ac41 100644 --- a/lib/html2confluence.rb +++ b/lib/html2confluence.rb @@ -1,5 +1,7 @@ require 'rexml/document' +require 'nokogiri' # For validating html from our editor + # A class to convert HTML to textile. Based on the python parser # found at http://aftnn.org/content/code/html2textile/ # @@ -406,6 +408,14 @@ def preprocess(data) # Fix unclosed data.gsub!(/(]+)(?/, '\1 />') + + # Parse with nokogiri to ensure not tags are left unclosed + # Ensure a parsing error from Nokogiri can't stop processing to get better error from REXML + begin + validated_data = Nokogiri::HTML::fragment(data).to_xml + data = validated_data + rescue Exception => e + end data end