diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..3c956bc --- /dev/null +++ b/Gemfile @@ -0,0 +1,6 @@ +source 'https://rubygems.org' + +gemspec + +gem 'pry'; require 'pry' +gem 'clipboard'; require 'clipboard' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..ca17bc5 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,46 @@ +PATH + remote: . + specs: + html2confluence (1.3.23) + nokogiri + +GEM + remote: https://rubygems.org/ + specs: + clipboard (1.1.1) + coderay (1.1.1) + diff-lcs (1.3) + method_source (0.8.2) + mini_portile2 (2.2.0) + nokogiri (1.8.0) + mini_portile2 (~> 2.2.0) + pry (0.10.4) + coderay (~> 1.1.0) + method_source (~> 0.8.1) + slop (~> 3.4) + rspec (3.6.0) + rspec-core (~> 3.6.0) + rspec-expectations (~> 3.6.0) + rspec-mocks (~> 3.6.0) + rspec-core (3.6.0) + rspec-support (~> 3.6.0) + rspec-expectations (3.6.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.6.0) + rspec-mocks (3.6.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.6.0) + rspec-support (3.6.0) + slop (3.6.0) + +PLATFORMS + ruby + +DEPENDENCIES + clipboard + html2confluence! + pry + rspec + +BUNDLED WITH + 1.13.7 diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..86dacca --- /dev/null +++ b/Rakefile @@ -0,0 +1,23 @@ +require 'bundler/gem_tasks' +require 'rake/testtask' + +Rake::TestTask.new do |t| + t.libs << 'test' + t.test_files = Dir['test/**/*_test.rb'] +end +task default: :test + +desc "Open a pry console preloaded with this library" +task console: 'console:pry' + +namespace :console do + + task :pry do + sh "bundle exec pry -I lib -r html2confluence.rb" + end + + task :irb do + sh "bundle exec irb -I lib -r html2confluence.rb" + end + +end diff --git a/html2confluence.gemspec b/html2confluence.gemspec index 1aba92d..6391d45 100644 --- a/html2confluence.gemspec +++ b/html2confluence.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.platform = Gem::Platform::RUBY s.name = 'html2confluence' - s.version = "1.3.22" + s.version = "1.3.23" s.summary = 'Converter from HTML to Confluence Wiki Markup' s.description = 'Provides an SGML parser to convert HTML into the Wiki Markup format' @@ -13,6 +13,7 @@ Gem::Specification.new do |s| s.require_path = 'lib' s.files = Dir.glob("{lib,spec}/**/*") + %w(example.rb README.mdown) - + s.add_dependency "nokogiri" + s.add_development_dependency "rspec" end diff --git a/lib/html2confluence.rb b/lib/html2confluence.rb index cc9815d..2a30349 100644 --- a/lib/html2confluence.rb +++ b/lib/html2confluence.rb @@ -1,5 +1,7 @@ require 'rexml/document' +require 'walker' + require 'nokogiri' # For validating html from our editor # A class to convert HTML to confluence markup. Based on the python parser @@ -74,7 +76,6 @@ def make_quicktag_start_pair(tag, wrapchar, attributes) def make_quicktag_end_pair(wrapchar) content = stop_capture - # Don't make quicktags with empty content. if content.join("").strip.empty? write(content) @@ -85,15 +86,25 @@ def make_quicktag_end_pair(wrapchar) unless in_nested_quicktag? #write([" "]) end - write(["#{wrapchar}"]) + write_quicktag_wrapchar(wrapchar) end - write(content.collect(&:strip)) - write([wrapchar]) unless @skip_quicktag + write(content) + write_quicktag_wrapchar(wrapchar) unless @skip_quicktag unless in_nested_quicktag? #write([" "]) end end + def write_quicktag_wrapchar(wrapchar) + write([ + if @last_write[-1] =~ /\s/ + "#{wrapchar}" + else + "{#{wrapchar}}" + end + ]) + end + def in_nested_quicktag? @quicktags ||= QUICKTAGS.keys @stack.size > 1 && @quicktags.include?(@stack[@stack.size-1]) && @quicktags.include?(@stack[@stack.size-2]) @@ -147,25 +158,25 @@ def handle_data(data) PAIRS = { 'bq' => 'bq', 'p' => 'p' } QUICKTAGS = { 'b' => '*', 'strong' => '*', 'del' => '-', 'strike' => '-', 'i' => '_', 'ins' => '+', 'u' => '+', 'em' => '_', 'cite' => '??', - 'sup' => '^', 'sub' => '~', 'code' => '@'} + 'sup' => '^', 'sub' => '~'} - PAIRS.each do |key, value| - define_method "start_#{key}" do |attributes| - make_block_start_pair(value, attributes) + PAIRS.each do |node, markup| + define_method "start_#{node}" do |attributes| + make_block_start_pair(markup, attributes) end - define_method "end_#{key}" do + define_method "end_#{node}" do make_block_end_pair end end - QUICKTAGS.each do |key, value| - define_method "start_#{key}" do |attributes| - make_quicktag_start_pair(key, value, attributes) + QUICKTAGS.each do |node, wrapchar| + define_method "start_#{node}" do |attributes| + make_quicktag_start_pair(node, wrapchar, attributes) end - define_method "end_#{key}" do - make_quicktag_end_pair(value) + define_method "end_#{node}" do + make_quicktag_end_pair(wrapchar) end end @@ -363,6 +374,17 @@ def end_blockquote write(s) end end + + def start_code(attrs) + @preserveWhitespace = true + write("{code}") + end + + def end_code + stop_capture_and_write + write("{code}") + @preserveWhitespace = false + end def start_pre(attrs) @preserveWhitespace = true @@ -392,7 +414,7 @@ def preprocess(data) data.gsub!(/&(mdash|#8212);/,'---') data.gsub!(/&(ndash|#8211);/,'--') - # remove empty blockquotes and list items (other empty elements are easy enough to deal with) + # remove empty blockquotes (other empty elements are easy enough to deal with) data.gsub!(/
\s*(]*>)?\s*<\/blockquote>/x,' ') # Fix unclosed
@@ -443,16 +465,15 @@ def preprocess(data) # Return the textile after processing def to_wiki_markup - fix_textile_whitespace!(result.join).gsub(/\n(\*|#)+\s*\n(\*|#)+/) do |match| - "\n#{match.split("\n").last.squeeze(' ')}" - end + fix_textile_whitespace!(result.join) end def fix_textile_whitespace!(output) - # fixes multiple blank lines, blockquote indicator followed by blank lines, and trailing whitespace after quicktags - # modifies input string and also returns it + # fixes multiple blank lines output.gsub!(/(\n\s*){2,}/,"\n\n") + # fixes blockquote indicator followed by blank lines output.gsub!(/bq. \n+(\w)/,'bq. \1') + # fixes trailing whitespace after quicktags QUICKTAGS.values.uniq.each do |t| output.gsub!(/ #{Regexp.escape(t)}[ \t]+#{Regexp.escape(t)} /,' ') # removes empty quicktags #output.gsub!(/(\[?#{Regexp.escape(t)})(\w+)([^#{Regexp.escape(t)}]+)(\s+)(#{Regexp.escape(t)}\]?)/,'\1\2\3\5\4') # fixes trailing whitespace before closing quicktags @@ -461,6 +482,14 @@ def fix_textile_whitespace!(output) #output.gsub!(/^[ \t]/,'') # leading whitespace #output.gsub!(/[ \t]$/,'') # trailing whitespace output.strip! + # fixes extra bullets generated when nesting list items + output.gsub!(/\n([\*|#]+)\s*\n([\*|#]+)/) do |match| + if $1 == $2 + match + else + "\n#{match.split("\n").last}" + end.squeeze(' ') + end return output end diff --git a/lib/walker.rb b/lib/walker.rb new file mode 100644 index 0000000..24d379c --- /dev/null +++ b/lib/walker.rb @@ -0,0 +1,316 @@ +class Walker + + NBSP = (0xC2.chr + 0xA0.chr).force_encoding(Encoding::UTF_8).freeze + HORIZONTAL_SPACE = /[\t\p{Zs}]/ + + def initialize(html) + @source = html + @parsed = Nokogiri::HTML.fragment(@source) + end + + def convert + postprocess preprocess(@parsed).accept(self) + end + + def preprocess(string) + string + # .gsub(/&(mdash|#8212);/, '---') + # .gsub(/&(ndash|#8211);/, '--') + end + + def postprocess(string) + # string.gsub(/\n+/, "\n").gsub(/#{HORIZONTAL_SPACE}+/, ' ') + # .gsub(/\n+/, "\n") + # .gsub(/#{HORIZONTAL_SPACE}+/, ' ') + string + .gsub(NBSP, ' ') + .gsub("\r", "\n") + .gsub(/&(mdash|#8212);/, '---') + .gsub(/&(ndash|#8211);/, '--') + end + + def visit(node) + send(:"handle_#{node.name}", node) + end + + def handle(nodes) + Array(nodes).reject do |node| + node.text? and node.content.gsub(/[\s\n]+/, '').empty? + end.map do |node| + [node, visit(node)] + end.map do |node, result| + if block_given? + yield node, result + else + result + end + end.flatten.join + end + + def respond_to_missing?(method) + method.starts_with?(:handle_) or super + end + + def method_missing(method, *args, &block) + if method.to_s.start_with?("handle_") and args.length == 1 + node, *_ = args + if node.text? + node.content + else + handle node.children + end + else; super; end + end + + def handle_text(node) + node.content + .gsub(/&(mdash|#8212);/, '---') + .gsub(/&(ndash|#8211);/, '--') + end + +# Node handlers + + def handle_#document-fragment(node) + handle(node.children) + end + + FORMAT_TAGS = { + [:b, :strong] => '*', + [:i, :em] => '_', + [:del, :strike] => '-', + [:u, :ins] => '+', + cite: '??', + sup: '^', + sub: '~' + } + + FORMAT_TAGS.each do |tags, markup| + Array(tags).each do |tag| + define_method :"handle_#{tag}" do |node| + if contentless? node + [] + else + start = if not node.previous or node.previous.name == "br" or (node.previous.text? and node.previous.content =~ /\W\Z/) + markup + else + ["{", markup, "}"] + end + close = if not node.next or node.next.name == "br" or (node.next.text? and node.next.content =~ /\A\W/) + markup + else + ["{", markup, "}"] + end + [start, handle(node.children), close] + end + end + end + end + + SECTION_TAGS = (1..6).map{|n| "h#{n}"} << "bq" + + SECTION_TAGS.each do |tag| + define_method :"handle_#{tag}" do |node| + # require 'pry'; Pry.config.input = STDIN; Pry.config.output = STDOUT; binding.pry + # ["\n\n", tag, ".", " ", handle(node.children), "\n\n"] + [tag, ".", " ", handle(node.children), "\n"] + end + end + + CONTENT_TAGS = [:p, :div] + + CONTENT_TAGS.each do |tag| + define_method :"handle_#{tag}" do |node| + # ["\n\n", handle(node.children), "\n\n"] + # require 'pry'; Pry.config.input = STDIN; Pry.config.output = STDOUT; binding.pry + if contentless? node + [] + else + [handle(node.children), "\n"] + end + end + end + + def contentless?(node) + contentless = [] + node.traverse do |child| + next if child == node + contentless << if child.text? + child.content.gsub(/\s\n/, '').empty? + elsif child.name == "br" + true + else + contentless? child + end + end + contentless.all? + end + + LIST_TAGS = [:ul, :ol] + + LIST_TAGS.each do |tag| + define_method :"handle_#{tag}" do |node| + if node.ancestors.any?{ |n| ["ul", "ol"].include? n.name } + handle(node.children) + else + ["\n", handle(node.children), "\n"] + end + # bullets = node.ancestors('ol,ul').to_a.unshift(node).reverse.map do |list| + # bullet_for list + # end + # handle(node.elements) do |node, result| + # [bullets, " ", result] + # end + end + end + + def handle_li(node) + if node.elements.length == 1 and ["ul", "ol"].include? node.elements.first.name + handle(node.children) + else + bullets = node.ancestors.select do |parent| + ["ol", "ul"].include? parent.name + end.reverse.map do |list| + bullet_for list + end + [bullets, " ", handle(node.children), "\n"] + end + end + + def bullet_for(list) + if list&.name == "ol" + "#" + elsif list&.attr("type") == "square" + "-" + else + "*" + end + end + + def handle_table(node) + ["\n", handle(node.children), "\n"] + end + + def handle_thead(node) + handle(node.children) + end + + def handle_tbody(node) + handle(node.children) + end + + def handle_tfooter(node) + handle(node.children) + end + + def handle_tr(node) + table_segment = node.ancestors.find do |parent| + ["thead", "tbody", "tfooter"].include? parent.name + end + has_header = node.elements.map(&:name).uniq.include? "th" + seperator = (table_segment&.name == "thead" or has_header) ? "||" : "|" + [seperator, handle(node.children), "\n"] + end + + def handle_th(node) + [handle(node.children).strip, " ", "||"] + end + + def handle_td(node) + [handle(node.children).strip, " ", "|"] + end + + def handle_blockquote(node) + shorthand = catch(:shorthand) do + node.traverse do |child| + next if child == node + # Check for multiline blockquotes + if child.text? and child.content =~ /\n/ + throw :shorthand, false + end + # Check for nested blockquotes + if child.name == "blockquote" + throw :shorthand, false + end + end + throw :shorthand, true + end + + if shorthand + ["bq.", " ", handle(node.children)] + else + ["\n", "{quote}", "\n", handle(node.children), "\n", "{quote}", "\n"] + end + end + + def handle_code(node) + ["{code}", handle(node.children), "{code}"] + end + + def handle_pre(node) + ["{noformat}", handle(node.children), "{noformat}"] + end + + def handle_a(node) + link = node["href"].to_s[1..-1] + if link + ["[", handle(node.children), link, "]"] + else + handle(node.children) + end + end + + def handle_font(node) + ["{color:#{node["color"]}}", handle(node.children), "{color}"] + end + + EMOJI = { + smile: ":)", + sad: ":(", + tongue: ":P", + biggrin: ":D", + wink: ";)", + thumbs_up: "(y)", + thumbs_down: "(n)", + information: "(i)", + check: "(/)", + error: "(x)", + warning: "(!)", + add: "(+)", + forbidden: "(-)", + help_16: "(?)", + lightbulb_on: "(on)", + lightbulb: "(off)", + star_yellow: "(*)", + star_red: "(*r)", + star_green: "(*g)", + star_blue: "(*b)", + star_yellow: "(*y)", + } + EMOJI_SRC = /([\w.-_:\/]+|\/)images\/icons\/emoticons\/(?#{EMOJI.keys.map(&:to_s).join("|")})\.(gif|png)/ + + def handle_img(node) + if src = node["src"] + if emoji = src[EMOJI_SRC, "emoji"] + EMOJI[emoji.to_sym] + else + [" ", "!", src, "!", " "] + end + end + end + + def handle_caption(node) + ["\n", handle(node.children), "\n"] + end + + def handle_tt(node) + ["{{", handle(node.children), "}}"] + end + + def handle_br(node) + ["\n"] + end + + def handle_hr(node) + ["----"] + end +end diff --git a/spec/combination_examples_spec.rb b/spec/combination_examples_spec.rb index f1ef51e..ff621ba 100644 --- a/spec/combination_examples_spec.rb +++ b/spec/combination_examples_spec.rb @@ -1,144 +1,131 @@ -# encoding: utf-8 -$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib') -require 'html2confluence' +require_relative 'spec_helper' describe HTMLToConfluenceParser, "when running combination examples" do it "should match complex examples" do - html = <<-END -
    -
  1. a
  2. -
  3. numbered item that is underlined.
  4. -
  5. list
  6. -
- END + html = <<~HTML +
    +
  1. a
  2. +
  3. numbered item that is underlined.
  4. +
  5. list
  6. +
+ HTML - markup = <<-END -# a -# numbered *item* that is +underlined+. -# list - END - - - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to include(markup.strip) + markup = <<~MARKUP + # a + # numbered *item* that is +underlined+. + # list + MARKUP + + expect(html).to match_markup(markup) end it "should match nested lists" do - html = <<-END -

One line

-
    -
  • Nested
  • -
  • -
      -
    1. bullets
    2. -
    3. go
    4. -
    5. here
    6. -
    7. -
        -
      1. dfsdf
      2. -
      3. dsfs
      4. -
      -
    8. -
    -
  • -
  • Final bullet
  • -
- -

More stuff too

- -
    -
  • In
  • -
  • -
      -
    • and
    • -
    -
  • -
  • out
  • -
  • -
      -
    1. with numbers
    2. -
    3. -
        -
      • and sub-bullets
      • -
      -
    4. -
    -
  • -
  • and back out
  • -
- -

With nice formatting.

- END - - markup = <<-END -One line - -* Nested -*# bullets -*# go -*# here -*## dfsdf -*## dsfs -* Final bullet - -More stuff too - -* In -** and -* out -*# with numbers -*#* and sub-bullets -* and back out - -h1. With +nice+ formatting. - END - - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to include(markup.strip) + html = <<~HTML +

One line

+
    +
  • Nested
  • +
  • +
      +
    1. bullets
    2. +
    3. go
    4. +
    5. here
    6. +
    7. +
        +
      1. dfsdf
      2. +
      3. dsfs
      4. +
      +
    8. +
    +
  • +
  • Final bullet
  • +
+ +

More stuff too

+ +
    +
  • In
  • +
  • +
      +
    • and
    • +
    +
  • +
  • out
  • +
  • +
      +
    1. with numbers
    2. +
    3. +
        +
      • and sub-bullets
      • +
      +
    4. +
    +
  • +
  • and back out
  • +
+ +

With nice formatting.

+ HTML + + markup = <<~MARKUP + One line + + * Nested + *# bullets + *# go + *# here + *## dfsdf + *## dsfs + * Final bullet + + More stuff too + + * In + ** and + * out + *# with numbers + *#* and sub-bullets + * and back out + + h1. With +nice+ formatting. + MARKUP + + expect(html).to match_markup(markup) end it "should match nested blockquotes" do - html = <<-END -
content here
- END + html = <<~HTML +
content here
+ HTML - markup = <<-END -{quote}\nbq. content here\n{quote} - END + markup = <<~MARKUP + {quote}\nbq. content here\n{quote} + MARKUP - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to include(markup.strip) + expect(html).to match_markup(markup) end it "should handle empty paragraphs" do - html = <<-END -

Previous


Scenario 4a: Existing deletes their ID
- Given I am an existing user

- END + html = <<~HTML +

Previous


Scenario 4a: Existing deletes their ID
+ Given I am an existing user

+ HTML markup = "Previous\n\n*Scenario 4a: Existing deletes their ID*\n*Given* I am an existing user" - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to eq(markup) + expect(html).to match_markup(markup) end it "should handle empty bold sections" do - html = <<-END -

Previous line

-


Scenario 4a: Existing deletes their ID
- Given I am an existing user

- END + html = <<~HTML +

Previous line

+


Scenario 4a: Existing deletes their ID
+ Given I am an existing user

+ HTML markup = "Previous line\n\n*Scenario 4a: Existing deletes their ID*\n*Given* I am an existing user" - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to eq(markup) + expect(html).to match_markup(markup) end it "doesn't remove extra newlines" do @@ -146,9 +133,7 @@ markup = "*And* first line\n\n*second line*" - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to eq(markup) + expect(html).to match_markup(markup) end it "handles unclosed img tags" do @@ -156,60 +141,47 @@ markup = "!a source!" - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to eq(markup) + expect(html).to match_markup(markup) end it "handles wbr tags" do html = "
familiar with the XMLHttpRequest Object
\n\n" markup = "familiar with the XMLHttpRequest Object" - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to eq(markup) + expect(html).to match_markup(markup) end it "should handle unclosed tags" do - html = <<-END -

Previous line

-
- END + html = <<~HTML +

Previous line

+
+ HTML markup = "Previous line\n\n----" - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to eq(markup) + expect(html).to match_markup(markup) end it "should handle HTML comments" do - html = <<-END -

A

- END + html = <<~HTML +

A

+ HTML markup = "A" - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to eq(markup) + expect(html).to match_markup(markup) end it "should handle CDATA elements" do - html = <<-END -

A

- - END + html = <<~HTML +

A

+ + HTML markup = "A" - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to eq(markup) + expect(html).to match_markup(markup) end end - - - diff --git a/spec/complex_tables_spec.rb b/spec/complex_tables_spec.rb index 1c8fa12..05982bd 100644 --- a/spec/complex_tables_spec.rb +++ b/spec/complex_tables_spec.rb @@ -1,90 +1,77 @@ -# encoding: utf-8 -$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib') -require 'html2confluence' +require_relative 'spec_helper' describe HTMLToConfluenceParser, "when running complex tables examples" do it "should handle table with newlines" do - html = <<-END -
As a...I would like...Because...

Student
or

Teacher

There to be more candy

Candy is:

  • Delicious
  • Shiny
  • Good for my teeth
- END + html = <<~HTML +
As a...I would like...Because...

Student
or

Teacher

There to be more candy

Candy is:

  • Delicious
  • Shiny
  • Good for my teeth
+ HTML - markup = <<-END -|As a...|I would like...|Because...| -|Student -or -\\\\ -Teacher|There to be more candy|Candy is: -\\\\ -* Delicious -* Shiny -* Good for my teeth| - END + markup = <<~MARKUP + |As a...|I would like...|Because...| + |Student + or + \\\\ + Teacher|There to be more candy|Candy is: + \\\\ + * Delicious + * Shiny + * Good for my teeth| + MARKUP - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to include(markup.strip) + expect(html).to match_markup(markup) end it "should handle table empty cells" do - html = <<-END -


Empty


- END + html = <<~HTML +


Empty


+ HTML - markup = <<-END -| |Empty| | - END + markup = <<~MARKUP + | |Empty| | + MARKUP - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to include(markup.strip) + expect(html).to match_markup(markup) end it "should handle pre in table empty cells" do - html = <<-END -
a
d
b
c
- END + html = <<~HTML +
a
d
b
c
+ HTML - markup = <<-END -|{noformat} -a{noformat} |d | -|{noformat} -b{noformat} |c | - END + markup = <<~MARKUP + |{noformat} + a{noformat} |d | + |{noformat} + b{noformat} |c | + MARKUP - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to include(markup.strip) + expect(html).to match_markup(markup) end it "should handle pre in table" do - html = <<-END - - - - - - - - - - - -
A B C
1
2
3
- END + html = <<~HTML + + + + + + + + + + + +
A B C
1
2
3
+ HTML - markup = <<-END -|A |{{B}} |C | -|1 |{noformat} -2{noformat} |3 | - END + markup = <<~MARKUP + |A |{{B}} |C | + |1 |{noformat} + 2{noformat} |3 | + MARKUP - parser = HTMLToConfluenceParser.new - parser.feed(html) - expect(parser.to_wiki_markup.strip).to include(markup.strip) + expect(html).to match_markup(markup) end end - - - diff --git a/spec/html2confluence_spec.rb b/spec/html2confluence_spec.rb index 53f93fc..22d3fcf 100644 --- a/spec/html2confluence_spec.rb +++ b/spec/html2confluence_spec.rb @@ -1,139 +1,133 @@ -# encoding: utf-8 -$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib') -require 'html2confluence' -#require 'redcloth' +require 'spec_helper' describe HTMLToConfluenceParser, "when converting html to textile" do - before :all do - html = <<-END -
- - Some text inside a div
with two lines -

- Converting HTML to Textile with Ruby - Converting HTML to Textile with Ruby -

- -
A note
Followed by another note
- -

- 23 November 2007 - (7:51 pm) -

- -

- By James Stewart
filed under: - Snippets -
tagged: , - , - , - , - , - -

- -

test paragraph without id or class attributes

- -

test paragraph without closing tag

- -

Break not closed
at all

- -
  • test invalid list item 1
  • -
  • test invalid list item 2
  • - -
      -
    1. test 1
    2. -
    3. test 2
      with a line break in the middle
    4. -
    5. test 3
    6. -

    7. -
    - - x> y - -
    -

    paragraph inside a blockquote

    -

    another paragraph inside a blockquote

    -
    - -

    - Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. - Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure - dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non - proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -

    - - - - - - - - - - -
    table caption
    heading 1heading 2
    value 1value 2
    - - Hughes & Hughes - - Something & something else and a useless span - - Some text before a table - - - - - - - - -
    heading 1heading 2
    value 1value 2
    - -

    - Please apply online at:
    www.something.co.uk/careers

    - -

    test test emphasised bold text test - An ordinal number - 1st -

    + let :html do + <<~HTML +
    + + Some text inside a div
    with two lines +

    + Converting HTML to Textile with Ruby + Converting HTML to Textile with Ruby +

    + +
    A note
    Followed by another note
    + +

    + 23 November 2007 + (7:51 pm) +

    + +

    + By James Stewart
    filed under: + Snippets +
    tagged: , + , + , + , + , + +

    + +

    test paragraph without id or class attributes

    + +

    test paragraph without closing tag

    + +

    Break not closed
    at all

    + +
  • test invalid list item 1
  • +
  • test invalid list item 2
  • + +
      +
    1. test 1
    2. +
    3. test 2
      with a line break in the middle
    4. +
    5. test 3
    6. +

    7. +
    + + x> y + +
    +

    paragraph inside a blockquote

    +

    another paragraph inside a blockquote

    +
    + +

    + Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure + dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non + proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +

    + + + + + + + + + + +
    table caption
    heading 1heading 2
    value 1value 2
    + + Hughes & Hughes + + Something & something else and a useless span + + Some text before a table + + + + + + + + +
    heading 1heading 2
    value 1value 2
    + +

    + Please apply online at:
    www.something.co.uk/careers

    + +

    test test emphasised bold text test + An ordinal number - 1st +

    - - -

     

    -
     
    - more bold text
    + + +

     

    +
     
    + more bold text
    -

    Some text with underlining is here.

    +

    Some text with underlining is here.

    -

    Æïœü

    +

    Æïœü

    + + some_good_code - © Copyright statement, let's see what happens to this… € 100 + © Copyright statement, let's see what happens to this… € 100 - An unknown named entity reference - &unknownref; + An unknown named entity reference - &unknownref; - strike 1 - strike 2 + strike 1 + strike 2 - # Not a list - * Not a list - - Not a list - *Not bold* - _Not a emph_ - {Not curly} - |Not table -
    - END - parser = HTMLToConfluenceParser.new - parser.feed(html) - @textile = parser.to_wiki_markup - #puts @textile - #puts RedCloth.new(@textile).to_html + # Not a list + * Not a list + - Not a list + *Not bold* + _Not a emph_ + {Not curly} + |Not table +
    + HTML end it "should convert heading tags" do - expect(@textile).to match(/^h1(\([^\)]+\))?\./) + expect(html).to match_markup(/^h1(\([^\)]+\))?\./) end it "should convert paragraph tags" do @@ -141,95 +135,96 @@ end it "should convert underline tags" do - expect(@textile).to include("text with +underlining+ is here") + expect(html).to include_markup("text with +underlining+ is here") end it "should not explicitly markup paragraphs unnecessarily" do - expect(@textile).to_not include("p. test paragraph without id or class attributes") + expect(html).not_to include_markup("p. test paragraph without id or class attributes") end it "should treat divs as block level elements, but ignore any attributes (effectively converting them to paragraphs)" do - expect(@textile).to include("\n\nA note\n\nFollowed by another note\n\n") + expect(html).to include_markup("\n\nA note\n\nFollowed by another note\n\n") end it "should not convert pointless spans to textile (i.e. without supported attributes)" do - expect(@textile).to_not include("%a useless span%") + expect(html).not_to include_markup("%a useless span%") end - it "should convert class and id attributes" do - # We don't convert classes. expect(@textile).to include("h1(story.title entry-title#post-312).") - end + # it "should convert class and id attributes" do + # # We don't convert classes. + # expect(html).to include_markup("h1(story.title entry-title#post-312).") + # end it "should convert tables" do - expect(@textile).to include("\n\n||heading 1 ||heading 2 || \n|value 1 |value 2 | \n") + expect(html).to include_markup("\n\n||heading 1 ||heading 2 || \n|value 1 |value 2 | \n") end it "should convert tables with text immediately preceding the opening table tag" do - expect(@textile).to include("Some text before a table\n\n||heading 1 ||heading 2 || \n|value 1 |value 2 | \n") + expect(html).to include_markup("Some text before a table\n\n||heading 1 ||heading 2 || \n|value 1 |value 2 | \n") end it "should respect line breaks within block level elements" do - expect(@textile).to include("\n# test 1 \n# test 2\nwith a line break in the middle") + expect(html).to include_markup("\n# test 1 \n# test 2\nwith a line break in the middle") end it "should handle paragraphs nested within blockquote" do - expect(@textile).to include("{quote}\n\nparagraph inside a blockquote\n\nanother paragraph inside a blockquote\n\n{quote}") + expect(html).to include_markup("{quote}\n\nparagraph inside a blockquote\n\nanother paragraph inside a blockquote\n\n{quote}") end it "should retain leading and trailing whitespace within inline elements" do - expect(@textile).to include("test *invalid* list item 1") + expect(html).to include_markup("test *invalid* list item 1") end it "should respect trailing line break tags within other elements" do - expect(@textile).to include("*Please apply online at:*\n[www.something.co.uk/careers|http://www.something.co.uk/careers]") + expect(html).to include_markup("*Please apply online at:*\n[www.something.co.uk/careers|http://www.something.co.uk/careers]") end it "should handle nested inline elements" do - expect(@textile).to include(" *_test emphasised bold text_* test") + expect(html).to include_markup(" *_test emphasised bold text_* test") end it "should remove empty quicktags before returning" do - expect(@textile).to_not include("*more bold text* *\n*") + expect(html).not_to include_markup("*more bold text* *\n*") end it "should remove unsupported elements (e.g. script)" do - expect(@textile).to_not include('script') + expect(html).not_to include_markup('script') end it "should remove unsupported attributes (i.e. everything but class and id)" do - expect(@textile).to_not include('summary') - expect(@textile).to_not include('a table with a caption') - expect(@textile).to_not include('style') - expect(@textile).to_not include('color:red;') + expect(html).not_to include_markup('summary') + expect(html).not_to include_markup('a table with a caption') + expect(html).not_to include_markup('style') + expect(html).not_to include_markup('color:red;') end it "should clean up multiple blank lines created by tolerant parsing before returning" do - expect(@textile).to_not match(/(\n\n\s*){2,}/) + expect(html).not_to match_markup(/(\n\n\s*){2,}/) end it "should keep entity references" do - expect(@textile).to include("©") + expect(html).to include_markup("©") end it "should output unknown named entity references" do - expect(@textile).to include("&unknownref;") + expect(html).to include_markup("&unknownref;") end it "should convert numerical entity references to a utf-8 character" do - expect(@textile).to include("…") + expect(html).to include_markup("…") end it "should ignore entities that are already converted" do - expect(@textile).to include("Æïœü") + expect(html).to include_markup("Æïœü") end it "should ignore ampersands that are not part of an entity reference" do - expect(@textile).to include("Hughes & Hughes") + expect(html).to include_markup("Hughes & Hughes") end it "should retain whitespace surrounding entity references" do - expect(@textile).to include("… € 100") - expect(@textile).to include("Something & something") + expect(html).to include_markup("… € 100") + expect(html).to include_markup("Something & something") end it "should escape special characters" do @@ -237,19 +232,22 @@ # characters that would otherwise be mistaken for markup. It should not # escape every instance of these characters. pending 'only escape correct characters' - expect(@textile).to include("\\# Not a list") - expect(@textile).to include("\\* Not a list") - expect(@textile).to include("\\- Not a list") - expect(@textile).to include("\\*Not bold\\*") - expect(@textile).to include("\\_Not a emph\\_") - expect(@textile).to include("\\{Not curly\\}") - expect(@textile).to include("\\|Not table") + expect(html).to include_markup("\\# Not a list") + expect(html).to include_markup("\\* Not a list") + expect(html).to include_markup("\\- Not a list") + expect(html).to include_markup("\\*Not bold\\*") + expect(html).to include_markup("\\_Not a emph\\_") + expect(html).to include_markup("\\{Not curly\\}") + expect(html).to include_markup("\\|Not table") end it "should support strikethrough" do - expect(@textile).to include("-strike 1-") - expect(@textile).to include("-strike 2-") + expect(html).to include_markup("-strike 1-") + expect(html).to include_markup("-strike 2-") + end + + it "should transform code" do + expect(html).to include_markup("{code}some_good_code{code}") end - end diff --git a/spec/jira_examples_spec.rb b/spec/jira_examples_spec.rb index 3310541..43eddc2 100644 --- a/spec/jira_examples_spec.rb +++ b/spec/jira_examples_spec.rb @@ -1,226 +1,219 @@ -# encoding: utf-8 -$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib') -require 'html2confluence' +require_relative 'spec_helper' describe HTMLToConfluenceParser, "when running JIRA examples" do - before :all do - html = <<-END -

    Biggest heading

    -

    Bigger heading

    -

    Big heading

    -

    Normal heading

    -
    Small heading
    -
    Smallest heading
    - -

    strong
    -emphasis
    -citation
    -deleted
    -inserted
    -superscript
    -subscript
    -monospaced

    -
    Some block quoted text
    - -
    -

    here is quotable
    - content to be quoted

    - -


    - look ma, red text!

    - -

    a
    b

    - -

    a
    -b

    - -
    - -

    a – b
    -a — b

    - -

    anchor

    - -

    http://jira.atlassian.com
    -Atlassian

    - -

    file:///c:/temp/foo.txt

    - -

    - -
      -
    • some
    • -
    • bullet -
        -
      • indented
      • -
      • bullets
      • -
      -
    • -
    • points
    • -
    - - -
      -
    • different
    • -
    • bullet
    • -
    • types
    • -
    - - -
      -
    1. a
    2. -
    3. numbered
    4. -
    5. list
    6. -
    - - -
      -
    1. a
    2. -
    3. numbered -
        -
      • with
      • -
      • nested
      • -
      • bullet
      • -
      -
    4. -
    5. list
    6. -
    - - -
      -
    • a
    • -
    • bulleted -
        -
      1. with
      2. -
      3. nested
      4. -
      5. numbered
      6. -
      -
    • -
    • list
    • -
    - - - - - - - - - - - - - - - - - - -
    heading 1heading 2heading 3
    col A1col A2col A3
    col B1col B2col B3
    - - - - - - -
    -
    preformatted piece of text
    - so *no* further _formatting_ is done here
    -
    -
    - END - - markup = <<-END - h1. Biggest heading -h2. Bigger heading -h3. Big heading -h4. Normal heading -h5. Small heading -h6. Smallest heading - -*strong* -_emphasis_ -??citation?? --deleted- -+inserted+ -^superscript^ -~subscript~ -{{monospaced}} -bq. Some block quoted text - -{quote} - here is quotable - content to be quoted -{quote} - -{color:red} - look ma, red text! -{color} - -a\\b - -a -b - ----- - -a -- b -a --- b - -[#anchor] - -[http://jira.atlassian.com] -[Atlassian|http://atlassian.com] - -[file:///c:/temp/foo.txt] - -{anchor:anchorname} - -* some -* bullet -** indented -** bullets -* points - -- different -- bullet -- types - -# a -# numbered -# list - -# a -# numbered -#* with -#* nested -#* bullet -# list - -* a -* bulleted -*# with -*# nested -*# numbered -* list - -||heading 1||heading 2||heading 3|| -|col A1|col A2|col A3| -|col B1|col B2|col B3| - -{noformat} -preformatted piece of text - so *no* further _formatting_ is done here -{noformat} - END - - - parser = HTMLToConfluenceParser.new - parser.feed(html) - @textile = parser.to_wiki_markup - #puts @textile - #puts RedCloth.new(@textile).to_html + let(:html) do + <<~HTML +

    Biggest heading

    +

    Bigger heading

    +

    Big heading

    +

    Normal heading

    +
    Small heading
    +
    Smallest heading
    + +

    strong
    + emphasis
    + citation
    + deleted
    + inserted
    + superscript
    + subscript
    + monospaced

    +
    Some block quoted text
    + +
    +

    here is quotable
    + content to be quoted

    + +


    + look ma, red text!

    + +

    a
    b

    + +

    a
    + b

    + +
    + +

    a – b
    + a — b

    + +

    anchor

    + +

    http://jira.atlassian.com
    + Atlassian

    + +

    file:///c:/temp/foo.txt

    + +

    + +
      +
    • some
    • +
    • bullet +
        +
      • indented
      • +
      • bullets
      • +
      +
    • +
    • points
    • +
    + + +
      +
    • different
    • +
    • bullet
    • +
    • types
    • +
    + + +
      +
    1. a
    2. +
    3. numbered
    4. +
    5. list
    6. +
    + + +
      +
    1. a
    2. +
    3. numbered +
        +
      • with
      • +
      • nested
      • +
      • bullet
      • +
      +
    4. +
    5. list
    6. +
    + + +
      +
    • a
    • +
    • bulleted +
        +
      1. with
      2. +
      3. nested
      4. +
      5. numbered
      6. +
      +
    • +
    • list
    • +
    + + + + + + + + + + + + + + + + + + +
    heading 1heading 2heading 3
    col A1col A2col A3
    col B1col B2col B3
    + + + + + + +
    +
    preformatted piece of text
    +       so *no* further _formatting_ is done here
    +      
    +
    + HTML + end + + let(:markup) do + <<~MARKUP + h1. Biggest heading + h2. Bigger heading + h3. Big heading + h4. Normal heading + h5. Small heading + h6. Smallest heading + + *strong* + _emphasis_ + ??citation?? + -deleted- + +inserted+ + ^superscript^ + ~subscript~ + {{monospaced}} + bq. Some block quoted text + + {quote} + here is quotable + content to be quoted + {quote} + + {color:red} + look ma, red text! + {color} + + a\\b + + a + b + + ---- + + a -- b + a --- b + + [#anchor] + + [http://jira.atlassian.com] + [Atlassian|http://atlassian.com] + + [file:///c:/temp/foo.txt] + + {anchor:anchorname} + + * some + * bullet + ** indented + ** bullets + * points + + - different + - bullet + - types + + # a + # numbered + # list + + # a + # numbered + #* with + #* nested + #* bullet + # list + + * a + * bulleted + *# with + *# nested + *# numbered + * list + + ||heading 1||heading 2||heading 3|| + |col A1|col A2|col A3| + |col B1|col B2|col B3| + + {noformat} + preformatted piece of text + so *no* further _formatting_ is done here + {noformat} + MARKUP end it "should convert images within a link" do @@ -244,73 +237,73 @@ end it "should convert heading tags" do - expect(@textile).to match(/^h1. Biggest heading/) - expect(@textile).to match(/^h2. Bigger heading/) - expect(@textile).to match(/^h3. Big heading/) - expect(@textile).to match(/^h4. Normal heading/) - expect(@textile).to match(/^h5. Small heading/) - expect(@textile).to match(/^h6. Smallest heading/) + expect(html).to match_markup(/^h1. Biggest heading/) + expect(html).to match_markup(/^h2. Bigger heading/) + expect(html).to match_markup(/^h3. Big heading/) + expect(html).to match_markup(/^h4. Normal heading/) + expect(html).to match_markup(/^h5. Small heading/) + expect(html).to match_markup(/^h6. Smallest heading/) end it "should convert inline formatting" do - expect(@textile).to match(/^\*strong\*/) - expect(@textile).to match(/^_emphasis_/) - expect(@textile).to match(/^\?\?citation\?\?/) - expect(@textile).to match(/^-deleted-/) - expect(@textile).to match(/^\+inserted\+/) - expect(@textile).to match(/^\^superscript\^/) - expect(@textile).to match(/^\~subscript\~/) - expect(@textile).to match(/^\{\{monospaced\}\}/) + expect(html).to match_markup(/\*strong\*/) + expect(html).to match_markup(/_emphasis_/) + expect(html).to match_markup(/\?\?citation\?\?/) + expect(html).to match_markup(/-deleted-/) + expect(html).to match_markup(/\+inserted\+/) + expect(html).to match_markup(/\^superscript\^/) + expect(html).to match_markup(/\~subscript\~/) + expect(html).to match_markup(/\{\{monospaced\}\}/) end it "should convert block quotes" do - expect(@textile).to match(/^bq. Some block quoted text/) - expect(@textile).to match(/^\{quote\}\s*here is quotable\s*content to be quoted\s*{quote}/) + expect(html).to match_markup(/^bq. Some block quoted text/) + expect(html).to match_markup(/^\{quote\}\s*here is quotable\s*content to be quoted\s*{quote}/) end it "should handle text color" do - expect(@textile).to match(/^\{color\:red\}\s*look ma, red text!\s*\{color\}/) + expect(html).to match_markup(/^\{color\:red\}\s*look ma, red text!\s*\{color\}/) end it "should convert horizontal rules" do - expect(@textile).to match(/^----/) + expect(html).to match_markup(/^----/) end it "should convert dashes" do - expect(@textile).to match(/^a -- b/) - expect(@textile).to match(/^a --- b/) + expect(html).to match_markup(/^a -- b/) + expect(html).to match_markup(/^a --- b/) end it "should convert links" do - expect(@textile).to match(/^\[\#anchor\]/) - expect(@textile).to match(/^\[http\:\/\/jira.atlassian.com\]/) - expect(@textile).to match(/^\[Atlassian\|http\:\/\/atlassian.com\]/) - expect(@textile).to match(/^\[file\:\/\/\/c\:\/temp\/foo.txt\]/) + expect(html).to match_markup(/^\[\#anchor\]/) + expect(html).to match_markup(/^\[http\:\/\/jira.atlassian.com\]/) + expect(html).to match_markup(/^\[Atlassian\|http\:\/\/atlassian.com\]/) + expect(html).to match_markup(/^\[file\:\/\/\/c\:\/temp\/foo.txt\]/) end it "should convert bullets" do - expect(@textile).to match(/\* some\s*\* bullet\s*\*\* indented\s*\*\* bullets\s*\* points/) - expect(@textile).to match(/- different\s*- bullet\s*- types/) - expect(@textile).to match(/# a\s*# numbered\s*# list/) - expect(@textile).to match(/# a\s*# numbered\s*#\* with\s*#\* nested\s*#\* bullet\s*# list/) - expect(@textile).to match(/\* a\s*\* bulleted\s*\*# with\s*\*# nested\s*\*# numbered\s*\* list/) + expect(html).to match_markup(/\* some\s*\* bullet\s*\*\* indented\s*\*\* bullets\s*\* points/) + expect(html).to match_markup(/- different\s*- bullet\s*- types/) + expect(html).to match_markup(/# a\s*# numbered\s*# list/) + expect(html).to match_markup(/# a\s*# numbered\s*#\* with\s*#\* nested\s*#\* bullet\s*# list/) + expect(html).to match_markup(/\* a\s*\* bulleted\s*\*# with\s*\*# nested\s*\*# numbered\s*\* list/) end it "should convert pre blocks" do - expect(@textile).to match(/^\{noformat\}\s*preformatted piece of text\s*so \*no\* further _formatting_ is done here\s*\{noformat\}/) + expect(html).to match_markup(/^\{noformat\}\s*preformatted piece of text\s*so \*no\* further _formatting_ is done here\s*\{noformat\}/) end it "should convert tables" do - expect(@textile).to include("||heading 1 ||heading 2 ||heading 3 ||") - expect(@textile).to include("|col A1 |col A2 |col A3 |") - expect(@textile).to include("|col B1 |col B2 |col B3 |") + expect(html).to include_markup("||heading 1 ||heading 2 ||heading 3 ||") + expect(html).to include_markup("|col A1 |col A2 |col A3 |") + expect(html).to include_markup("|col B1 |col B2 |col B3 |") end it "should convert emoji from jira" do - expect(@textile).to include(":)") - expect(@textile).to include("(!)") - expect(@textile).to include("(off)") - expect(@textile).to include("(/)") + expect(html).to include_markup(":)") + expect(html).to include_markup("(!)") + expect(html).to include_markup("(off)") + expect(html).to include_markup("(/)") end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..763a30c --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,89 @@ +$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib') +require 'html2confluence' + +require 'rspec' + +RSpec.configure do |config| + config.color = true + config.tty = true + config.formatter = :documentation +end + +require 'rspec/expectations' + +module MarkupHelpers + module_function + + def html_to_markup(html) + # parser = HTMLToConfluenceParser.new + # parser.feed(html) + # parser.to_wiki_markup + Walker.new(html).convert + end + + def indent(markup) + if markup.kind_of? String + markup.lines.map{|l| " #{l}"}.join + else + " #{markup.inspect}" + end + end + +end + +RSpec::Matchers.define :match_markup do |markup| + include MarkupHelpers + + diffable + + match do |html| + @html = html + @actual = html_to_markup(html).strip + @markup = markup + case markup + when String + values_match? @markup.strip, @actual + when Regexp + !!markup.match(@actual) + end + end + + failure_message do |html| + <<~ERR + expected that the parsed HTML: + #{indent @html} + + would produce markup matching: + #{indent @markup} + + instead, the parser produced: + #{indent @actual} + ERR + end +end + +RSpec::Matchers.define :include_markup do |markup| + include MarkupHelpers + + diffable + + match do |html| + @html = html + @actual = html_to_markup(html).strip + @markup = markup.strip + @actual.include? @markup + end + + failure_message do |html| + <<~ERR + expected that the parsed HTML: + #{indent @html} + + would include markup: + #{indent @markup} + + instead, the parser produced: + #{indent @actual} + ERR + end +end diff --git a/spec/tiny_examples_spec.rb b/spec/tiny_examples_spec.rb new file mode 100644 index 0000000..076926a --- /dev/null +++ b/spec/tiny_examples_spec.rb @@ -0,0 +1,399 @@ +require_relative 'spec_helper' + +describe HTMLToConfluenceParser do + + context "handling nested formats with spans and  " do + + it "should handle word-embedded nested formatting" do + html = <<~HTML + onetwothreefourfive + HTML + markup = <<~MARKUP + one{_}two{*}three{*}four{_}five + MARKUP + expect(html).to match_markup(markup) + + html = <<~HTML + one two three four five + HTML + markup = <<~MARKUP + one{_} two{*} three {*}four {_}five + MARKUP + expect(html).to match_markup(markup) + + html = <<~HTML + one two three four five + HTML + markup = <<~MARKUP + one _two *three* four_ five + MARKUP + expect(html).to match_markup(markup) + + html = <<~HTML + one two three four five + HTML + markup = <<~MARKUP + one _ two * three * four _ five + MARKUP + expect(html).to match_markup(markup) + end + + it "should handle arbitrary spans" do + html = <<~HTML + testtesttest + HTML + + markup = <<~MARKUP + testtesttest + MARKUP + + expect(html).to match_markup(markup) + end + + it "should handle arbitrary %nbsp;" do + html = <<~HTML + test test test + HTML + + markup = <<~MARKUP + test test test + MARKUP + + expect(html).to match_markup(markup) + end + + it "should handle trailing %nbsp; inside formats", pending: true do + html = <<~HTML + test test + HTML + markup = <<~MARKUP + *test* test + MARKUP + expect(html).to match_markup(markup) + + html = <<~HTML + test  test + HTML + markup = <<~MARKUP + *test* test + MARKUP + expect(html).to match_markup(markup) + + html = <<~HTML + test  test + HTML + markup = <<~MARKUP + test *test* + MARKUP + expect(html).to match_markup(markup) + end + + it "should handle format-wrapping spans" do + html = <<~HTML + test + HTML + + markup = <<~MARKUP + *test* + MARKUP + + expect(html).to match_markup(markup) + end + + it "should handle all these things at once" do + html = <<~HTML + test test test + HTML + + markup = <<~MARKUP + test _test *test*_ + MARKUP + + expect(html).to match_markup(markup) + end + + end + + it "should preserve empty li's" do + html = <<~HTML +
      +
    • +
    • test
    • +
    • test
    • +
    + HTML + markup = "* \n* test\n* test" + expect(html).to match_markup(markup) + + html = <<~HTML +
      +
    • test
    • +
    • +
    • test
    • +
    + HTML + markup = "* test\n* \n* test" + expect(html).to match_markup(markup) + + html = <<~HTML +
      +
    • test
    • +
    • test
    • +
    • +
    + HTML + markup = "* test\n* test\n*" + expect(html).to match_markup(markup) + end + + context "formatting within tables" do + + it "should normalize spaces around table items to only contain one trailing space" do + html = <<~HTML + + + + + + + +
    Header
    text
    + HTML + + markup = <<~HTML + ||Header || + |text | + HTML + + expect(html).to match_markup(markup) + end + + it "should handle formatting" do + html = <<~HTML + + + + + + + +
    Header
    bold
    + HTML + + markup = <<~HTML + ||Header || + |*bold* | + HTML + + expect(html).to match_markup(markup) + end + + it "should handle lists" do + html = <<~HTML + + + + + + + +
    Header
    +
      +
    • test
    • +
    • test
    • +
    +
    + HTML + + markup = <<~HTML + ||Header || + |* test + * test | + HTML + + expect(html).to match_markup(markup) + end + + it "should handle empty cells" do + html = <<~HTML + + + + + + + +
    Header
    + HTML + + markup = <<~HTML + ||Header || + | | + HTML + + expect(html).to match_markup(markup) + + html = <<~HTML + + + + + + + + + +
    HeaderHeader
    text
    + HTML + + markup = <<~HTML + ||Header ||Header || + |text | | + HTML + + expect(html).to match_markup(markup) + + html = <<~HTML + + + + + + + + + +
    HeaderHeader
    text
    + HTML + + markup = <<~HTML + ||Header ||Header || + |text | | + HTML + + expect(html).to match_markup(markup) + + html = <<~HTML + + + + + + + + + +
    HeaderHeader
    text
    + HTML + + markup = <<~HTML + ||Header ||Header || + | |text | + HTML + + expect(html).to match_markup(markup) + + html = <<~HTML + + + + + + + + + + + +
    HeaderHeaderHeader
    texttext
    + HTML + + markup = <<~HTML + ||Header ||Header ||Header || + |text | |text | + HTML + + expect(html).to match_markup(markup) + end + + end + + it "should convert emoji images to literals" do + html = <<~HTML + + + + + HTML + markup = <<~MARKUP + :)(!)(off)(/) + MARKUP + expect(html).to match_markup(markup) + end + + it "should handle headers" do + html = <<~HTML +

    Biggest heading

    + HTML + markup = <<~MARKUP + h1. Biggest heading + MARKUP + expect(html).to match_markup(markup) + end + + it "should handle empty links" do + html = <<~HTML + foobar + HTML + markup = <<~MARKUP + foobar + MARKUP + expect(html).to match_markup(markup) + end + + it "should handle empty links inside headers" do + html = <<~HTML +

    Biggest heading

    + HTML + markup = <<~MARKUP + h1. Biggest heading + MARKUP + expect(html).to match_markup(markup) + end + + it "should handle font colors with whitespace and brs" do + html = <<~HTML +


    + look ma, red text!

    + HTML + markup = <<~MARKUP + {color:red} + + look ma, red text!{color} + MARKUP + expect(html).to match_markup(markup) + end + + it "should handle html entities" do + html = <<~HTML +

    a
    + b

    + +
    + +

    a – b
    + a — b

    + HTML + markup = <<~MARKUP + a + b + + --- + + a - b + a – b + MARKUP + expect(html).to match_markup(markup) + end + +end