From 3943b0e03ca118e001b157e894a8e915a07eb544 Mon Sep 17 00:00:00 2001 From: Aske Hansen Date: Fri, 22 Mar 2019 12:14:40 +0100 Subject: [PATCH 1/8] added test for email with regards --- email_reply_parser.gemspec | 1 + test/email_reply_parser_test.rb | 6 ++++++ test/emails/email_with_kind_regards.txt | 9 +++++++++ 3 files changed, 16 insertions(+) create mode 100644 test/emails/email_with_kind_regards.txt diff --git a/email_reply_parser.gemspec b/email_reply_parser.gemspec index 2154fae..e94f5a4 100644 --- a/email_reply_parser.gemspec +++ b/email_reply_parser.gemspec @@ -63,6 +63,7 @@ Gem::Specification.new do |s| test/emails/email_sig_delimiter_in_middle_of_line.txt test/emails/greedy_on.txt test/emails/pathological.txt + test/emails/email_with_kind_regards.txt ] # = MANIFEST = diff --git a/test/email_reply_parser_test.rb b/test/email_reply_parser_test.rb index df509cb..d495bad 100644 --- a/test/email_reply_parser_test.rb +++ b/test/email_reply_parser_test.rb @@ -222,6 +222,12 @@ def test_doesnt_remove_signature_delimiter_in_mid_line assert_equal 1, reply.fragments.size end + def test_kind_regards_signature + reply = email('email_with_kind_regards') + assert_match(/Thats a great idea/, reply.fragments[0].to_s) + assert_equal [false, true], reply.fragments.map { |f| f.signature? } + end + def email(name) body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s EmailReplyParser.read body diff --git a/test/emails/email_with_kind_regards.txt b/test/emails/email_with_kind_regards.txt new file mode 100644 index 0000000..da91e71 --- /dev/null +++ b/test/emails/email_with_kind_regards.txt @@ -0,0 +1,9 @@ +Hey, + +Thats a great idea! + + +Med venlig hilsen / Kind regards + +Tim Tommy +CEO From 6f555731c47a38494d371fb7e1bb0c7b1dcc6de5 Mon Sep 17 00:00:00 2001 From: Aske Hansen Date: Fri, 22 Mar 2019 12:36:13 +0100 Subject: [PATCH 2/8] added regards to to regex --- lib/email_reply_parser.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/email_reply_parser.rb b/lib/email_reply_parser.rb index e81cbcb..9e0b58d 100644 --- a/lib/email_reply_parser.rb +++ b/lib/email_reply_parser.rb @@ -131,8 +131,7 @@ def read(text) end private - EMPTY = "".freeze - SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)' + SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)|(neslih gilnev deM$)|(sdrager dniK$)|(sdrager mraW$)|(sdrager tseB$)' begin require 're2' From 59a09c98a9685b199f2f10f0d9e38b05c061b4f6 Mon Sep 17 00:00:00 2001 From: Aske Hansen Date: Fri, 22 Mar 2019 12:44:48 +0100 Subject: [PATCH 3/8] accedentally removed EMPTY --- lib/email_reply_parser.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/email_reply_parser.rb b/lib/email_reply_parser.rb index 9e0b58d..43d0f55 100644 --- a/lib/email_reply_parser.rb +++ b/lib/email_reply_parser.rb @@ -131,6 +131,7 @@ def read(text) end private + EMPTY = "".freeze SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)|(neslih gilnev deM$)|(sdrager dniK$)|(sdrager mraW$)|(sdrager tseB$)' begin From de92b4ba92af824b7f23313d8effabf8be7571df Mon Sep 17 00:00:00 2001 From: Aske Hansen Date: Wed, 3 Apr 2019 10:39:35 +0200 Subject: [PATCH 4/8] create a regards fragment --- lib/email_reply_parser.rb | 21 +++++++++++++++++---- test/email_reply_parser_test.rb | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/lib/email_reply_parser.rb b/lib/email_reply_parser.rb index 43d0f55..9afa00c 100644 --- a/lib/email_reply_parser.rb +++ b/lib/email_reply_parser.rb @@ -132,13 +132,16 @@ def read(text) private EMPTY = "".freeze - SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)|(neslih gilnev deM$)|(sdrager dniK$)|(sdrager mraW$)|(sdrager tseB$)' + REGARDS = '(neslih gilnev deM$)|(sdrager dniK$)|(sdrager mraW$)|(sdrager tseB$)' + SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)' begin require 're2' SIG_REGEX = RE2::Regexp.new(SIGNATURE) + REGARDS_REGEX = RE2::Regexp.new(REGARDS) rescue LoadError SIG_REGEX = Regexp.new(SIGNATURE) + REGARDS_REGEX = Regexp.new(REGARDS) end ### Line-by-Line Parsing @@ -166,6 +169,15 @@ def scan_line(line) end end + # Mark the current Fragment as a regards if the current line is empty + # and the Fragment starts with a common regards indicator. + if @fragment && line == EMPTY + if REGARDS_REGEX.match @fragment.lines.last + @fragment.regards = true + finish_fragment + end + end + # If the line matches the current fragment, add it. Note that a common # reply header also counts as part of the quoted Fragment, even though # it doesn't start with `>`. @@ -217,7 +229,7 @@ def finish_fragment if @fragment @fragment.finish if !@found_visible - if @fragment.quoted? || @fragment.signature? || + if @fragment.quoted? || @fragment.signature? || @fragment.regards? || @fragment.to_s.strip == EMPTY @fragment.hidden = true else @@ -235,7 +247,7 @@ def finish_fragment # Represents a group of paragraphs in the email sharing common attributes. # Paragraphs should get their own fragment if they are a quoted area or a # signature. - class Fragment < Struct.new(:quoted, :signature, :hidden) + class Fragment < Struct.new(:quoted, :signature, :hidden, :regards) # This is an Array of String lines of content. Since the content is # reversed, this array is backwards, and contains reversed strings. attr_reader :lines, @@ -245,7 +257,7 @@ class Fragment < Struct.new(:quoted, :signature, :hidden) :content def initialize(quoted, first_line) - self.signature = self.hidden = false + self.signature = self.hidden = self.regards = false self.quoted = quoted @lines = [first_line] @content = nil @@ -255,6 +267,7 @@ def initialize(quoted, first_line) alias quoted? quoted alias signature? signature alias hidden? hidden + alias regards? regards # Builds the string content by joining the lines and reversing them. # diff --git a/test/email_reply_parser_test.rb b/test/email_reply_parser_test.rb index d495bad..bb1ef14 100644 --- a/test/email_reply_parser_test.rb +++ b/test/email_reply_parser_test.rb @@ -225,7 +225,7 @@ def test_doesnt_remove_signature_delimiter_in_mid_line def test_kind_regards_signature reply = email('email_with_kind_regards') assert_match(/Thats a great idea/, reply.fragments[0].to_s) - assert_equal [false, true], reply.fragments.map { |f| f.signature? } + assert_equal [false, true], reply.fragments.map { |f| f.regards? } end def email(name) From 3d7e4b691641c85893e67d2f13863b3d64d3cde1 Mon Sep 17 00:00:00 2001 From: Aske Hansen Date: Wed, 3 Apr 2019 10:50:16 +0200 Subject: [PATCH 5/8] create regards regex from array and ignore case --- lib/email_reply_parser.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/email_reply_parser.rb b/lib/email_reply_parser.rb index 9afa00c..3a653ce 100644 --- a/lib/email_reply_parser.rb +++ b/lib/email_reply_parser.rb @@ -132,16 +132,19 @@ def read(text) private EMPTY = "".freeze - REGARDS = '(neslih gilnev deM$)|(sdrager dniK$)|(sdrager mraW$)|(sdrager tseB$)' + REGARDS = ['med venlig hilsen', 'kind regards', 'warm regards', 'best regards'].map do |regard| + "(#{regard.reverse}$)" + end.join('|') + SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)' begin require 're2' SIG_REGEX = RE2::Regexp.new(SIGNATURE) - REGARDS_REGEX = RE2::Regexp.new(REGARDS) + REGARDS_REGEX = RE2::Regexp.new(REGARDS, case_sensitive: false) rescue LoadError SIG_REGEX = Regexp.new(SIGNATURE) - REGARDS_REGEX = Regexp.new(REGARDS) + REGARDS_REGEX = Regexp.new(REGARDS, ignore_case: true) end ### Line-by-Line Parsing From cdb2aafb942244e5912ff8a08811d962adcf6df8 Mon Sep 17 00:00:00 2001 From: Aske Hansen Date: Wed, 3 Apr 2019 11:41:39 +0200 Subject: [PATCH 6/8] added configuration class --- lib/email_reply_parser.rb | 34 ++++++++++++++++++++++++- test/email_reply_parser_test.rb | 15 +++++++++++ test/emails/email_with_kind_regards.txt | 2 +- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/lib/email_reply_parser.rb b/lib/email_reply_parser.rb index 3a653ce..790a942 100644 --- a/lib/email_reply_parser.rb +++ b/lib/email_reply_parser.rb @@ -32,6 +32,26 @@ class EmailReplyParser VERSION = "0.5.9" + class << self + attr_writer :configuration + + # Public: Configuration + # + # Returns a Configration instance . + # + def configuration + @configuration ||= Configuration.new + end + + # Public: Configures EmailReplyParser + # + # block - a default configuration instance is exposed in the block + # + def configure + yield(configuration) + end + end + # Public: Splits an email body into a list of Fragments. # # text - A String email body. @@ -50,6 +70,18 @@ def self.parse_reply(text) self.read(text).visible_text end + ### Configuration + + # A Configuration instance. + class Configuration + # Configuration has an Array of regards + attr_accessor :regards + + def initialize + @regards = [] + end + end + ### Emails # An Email instance represents a parsed body String. @@ -132,7 +164,7 @@ def read(text) private EMPTY = "".freeze - REGARDS = ['med venlig hilsen', 'kind regards', 'warm regards', 'best regards'].map do |regard| + REGARDS = EmailReplyParser.configuration.regards.map do |regard| "(#{regard.reverse}$)" end.join('|') diff --git a/test/email_reply_parser_test.rb b/test/email_reply_parser_test.rb index bb1ef14..dc1b1af 100644 --- a/test/email_reply_parser_test.rb +++ b/test/email_reply_parser_test.rb @@ -9,6 +9,18 @@ EMAIL_FIXTURE_PATH = dir + 'emails' class EmailReplyParserTest < Test::Unit::TestCase + def test_default_configuration + assert_instance_of EmailReplyParser::Configuration, EmailReplyParser.configuration + end + + def test_configure_regards + EmailReplyParser.configure do |config| + config.regards = ['best regards'] + end + + assert_equal ['best regards'], EmailReplyParser.configuration.regards + end + def test_encoding_should_be_maintained body = IO.read EMAIL_FIXTURE_PATH.join("email_1_1.txt").to_s EmailReplyParser.read body @@ -223,6 +235,9 @@ def test_doesnt_remove_signature_delimiter_in_mid_line end def test_kind_regards_signature + # EmailReplyParser.configure do |config| + # config.regards = ['Kind regards'] + # end reply = email('email_with_kind_regards') assert_match(/Thats a great idea/, reply.fragments[0].to_s) assert_equal [false, true], reply.fragments.map { |f| f.regards? } diff --git a/test/emails/email_with_kind_regards.txt b/test/emails/email_with_kind_regards.txt index da91e71..870761c 100644 --- a/test/emails/email_with_kind_regards.txt +++ b/test/emails/email_with_kind_regards.txt @@ -3,7 +3,7 @@ Hey, Thats a great idea! -Med venlig hilsen / Kind regards +Kind regards Tim Tommy CEO From b9aeb3f4ac4947182b7b42b94861799af02f6619 Mon Sep 17 00:00:00 2001 From: Aske Hansen Date: Wed, 3 Apr 2019 11:58:04 +0200 Subject: [PATCH 7/8] create regards regex at runtime --- lib/email_reply_parser.rb | 33 +++++++++++++++++++++++---------- test/email_reply_parser_test.rb | 10 +++------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/lib/email_reply_parser.rb b/lib/email_reply_parser.rb index 790a942..d3cd259 100644 --- a/lib/email_reply_parser.rb +++ b/lib/email_reply_parser.rb @@ -164,19 +164,31 @@ def read(text) private EMPTY = "".freeze - REGARDS = EmailReplyParser.configuration.regards.map do |regard| - "(#{regard.reverse}$)" - end.join('|') - SIGNATURE = '(?m)(--\s*$|__\s*$|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)' begin require 're2' SIG_REGEX = RE2::Regexp.new(SIGNATURE) - REGARDS_REGEX = RE2::Regexp.new(REGARDS, case_sensitive: false) rescue LoadError SIG_REGEX = Regexp.new(SIGNATURE) - REGARDS_REGEX = Regexp.new(REGARDS, ignore_case: true) + end + + # Regular expression for regards + # + # Returns a Regexp instance if regards are configured, otherwise it returns + # nil + def regards_regex + return nil if EmailReplyParser.configuration.regards.empty? + value = EmailReplyParser.configuration.regards.map do |regard| + "(#{regard.reverse}$)" + end.join('|') + + begin + require 're2' + RE2::Regexp.new(value, case_sensitive: false) + rescue LoadError + Regexp.new(value, ignore_case: true) + end end ### Line-by-Line Parsing @@ -204,10 +216,11 @@ def scan_line(line) end end - # Mark the current Fragment as a regards if the current line is empty - # and the Fragment starts with a common regards indicator. - if @fragment && line == EMPTY - if REGARDS_REGEX.match @fragment.lines.last + # Mark the current Fragment as a regards if regards are configured and + # the current line is empty and the Fragment starts with a common regards + # indicator. + if regards_regex && @fragment && line == EMPTY + if regards_regex.match @fragment.lines.last @fragment.regards = true finish_fragment end diff --git a/test/email_reply_parser_test.rb b/test/email_reply_parser_test.rb index dc1b1af..48f411f 100644 --- a/test/email_reply_parser_test.rb +++ b/test/email_reply_parser_test.rb @@ -9,10 +9,6 @@ EMAIL_FIXTURE_PATH = dir + 'emails' class EmailReplyParserTest < Test::Unit::TestCase - def test_default_configuration - assert_instance_of EmailReplyParser::Configuration, EmailReplyParser.configuration - end - def test_configure_regards EmailReplyParser.configure do |config| config.regards = ['best regards'] @@ -235,9 +231,9 @@ def test_doesnt_remove_signature_delimiter_in_mid_line end def test_kind_regards_signature - # EmailReplyParser.configure do |config| - # config.regards = ['Kind regards'] - # end + EmailReplyParser.configure do |config| + config.regards = ['Kind regards'] + end reply = email('email_with_kind_regards') assert_match(/Thats a great idea/, reply.fragments[0].to_s) assert_equal [false, true], reply.fragments.map { |f| f.regards? } From 8c8c036085d73b34b4b3744bb781232d244c516a Mon Sep 17 00:00:00 2001 From: Aske Hansen Date: Wed, 3 Apr 2019 11:59:05 +0200 Subject: [PATCH 8/8] rename test --- test/email_reply_parser_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/email_reply_parser_test.rb b/test/email_reply_parser_test.rb index 48f411f..de1f38a 100644 --- a/test/email_reply_parser_test.rb +++ b/test/email_reply_parser_test.rb @@ -9,7 +9,7 @@ EMAIL_FIXTURE_PATH = dir + 'emails' class EmailReplyParserTest < Test::Unit::TestCase - def test_configure_regards + def test_regards_configuration EmailReplyParser.configure do |config| config.regards = ['best regards'] end