From f83e967c7c8ff9574bb85be6fad783d54c98c997 Mon Sep 17 00:00:00 2001 From: Eli Young Date: Mon, 14 Mar 2016 19:46:07 -0700 Subject: [PATCH 1/2] Match the entire string when testing This allows for validating that a pattern will fail to match an entire string, even if it would match partially. --- spec/spec_helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index f968d590..069e1dd0 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -19,7 +19,7 @@ def grok_match(label, message) end def build_grok(label) - grok = LogStash::Filters::Grok.new("match" => ["message", "%{#{label}}"]) + grok = LogStash::Filters::Grok.new("match" => ["message", "\A%{#{label}}\z"]) grok.register grok end From f132dab9f55b916a3f10ebdcec791ff6166c00d0 Mon Sep 17 00:00:00 2001 From: Eli Young Date: Wed, 28 Oct 2015 15:21:26 -0700 Subject: [PATCH 2/2] Add ampersand to URIPATH RFC1738 specifies that URL paths can legally contain ampersands: ; HTTP httpurl = "http://" hostport [ "/" hpath [ "?" search ]] hpath = hsegment *[ "/" hsegment ] hsegment = *[ uchar | ";" | ":" | "@" | "&" | "=" ] search = *[ uchar | ";" | ":" | "@" | "&" | "=" ] Accordingly, HAProxy (and potentially other applications) will not escape them in log files. --- patterns/grok-patterns | 2 +- spec/patterns/core_spec.rb | 56 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/patterns/grok-patterns b/patterns/grok-patterns index 09dc224e..ed2ed487 100644 --- a/patterns/grok-patterns +++ b/patterns/grok-patterns @@ -40,7 +40,7 @@ URIPROTO [A-Za-z]+(\+[A-Za-z+]+)? URIHOST %{IPORHOST}(?::%{POSINT:port})? # uripath comes loosely from RFC1738, but mostly from what Firefox # doesn't turn into %XX -URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+ +URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+ #URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]* URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? diff --git a/spec/patterns/core_spec.rb b/spec/patterns/core_spec.rb index 9f1f0ca3..7cdd17d6 100644 --- a/spec/patterns/core_spec.rb +++ b/spec/patterns/core_spec.rb @@ -113,6 +113,62 @@ end end +describe "URIPATH" do + let(:pattern) { 'URIPATH' } + + context "when matching valid URIs" do + context "and the URI is simple" do + let(:value) { '/foo' } + + it "should match the path" do + expect(grok_match(pattern,value)).to pass + end + end + + context "and the URI has a trailing slash" do + let(:value) { '/foo/' } + + it "should match the path" do + expect(grok_match(pattern,value)).to pass + end + end + + context "and the URI has multiple levels" do + let(:value) { '/foo/bar' } + + it "should match the path" do + expect(grok_match(pattern,value)).to pass + end + end + + context "and the URI has fancy characters" do + let(:value) { '/aA1$.+!*\'(){},~:;=@#%&|-' } + + it "should match the path" do + expect(grok_match(pattern,value)).to pass + end + end + end + + context "when matching invalid URIs" do + context "and the URI has no leading slash" do + let(:value) { 'foo' } + + it "should not match the path" do + expect(grok_match(pattern,value)).not_to pass + end + end + + context "and the URI has invalid characters" do + let(:value) { '/`' } + + it "should not match the path" do + expect(grok_match(pattern,value)).not_to pass + end + end + end +end + describe "IPV4" do let(:pattern) { 'IPV4' }