Skip to content

Commit 4e8ca37

Browse files
committed
Use concurrency :shared and sync codecs to optimize performance
As part of this refactor the `message_format` option had to be (finally) obsoleted.It was previously deprecated for quite some time. This provides a nice boost: Before: ``` time bin/logstash -e "input { generator { count => 3000000} } filter { } output { file { path => '/tmp/newfileout'} }" Settings: Default pipeline workers: 8 Pipeline main started Pipeline main has been shutdown stopping pipeline {:id=>"main"} 139.95 real 223.61 user 28.93 sys ``` After ``` rm /tmp/newfileout; time bin/logstash -e "input { generator { count => 3000000} } filter { } output { file { codec => json_lines path => '/tmp/newfileout'} }" ; ls -lh /tmp/newfileout Settings: Default pipeline workers: 8 Pipeline main started Pipeline main has been shutdown stopping pipeline {:id=>"main"} 56.12 real 192.99 user 17.38 sys ``` Fixes #46
1 parent 2cf8878 commit 4e8ca37

File tree

4 files changed

+71
-70
lines changed

4 files changed

+71
-70
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 4.0.0
2+
- Make 'message_format' option obsolete
3+
- Use new Logsash 2.4/5.0 APIs for working batchwise and with shared concurrency
4+
15
## 3.0.2
26
- Relax constraint on logstash-core-plugin-api to >= 1.60 <= 2.99
37

lib/logstash/outputs/file.rb

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# }
1818
# }
1919
class LogStash::Outputs::File < LogStash::Outputs::Base
20+
concurrency :shared
21+
2022
FIELD_REF = /%\{[^}]+\}/
2123

2224
config_name "file"
@@ -35,13 +37,7 @@ class LogStash::Outputs::File < LogStash::Outputs::Base
3537
# E.g: `/%{myfield}/`, `/test-%{myfield}/` are not valid paths
3638
config :path, :validate => :string, :required => true
3739

38-
# The format to use when writing events to the file. This value
39-
# supports any string and can include `%{name}` and other dynamic
40-
# strings.
41-
#
42-
# If this setting is omitted, the full json representation of the
43-
# event will be written as a single line.
44-
config :message_format, :validate => :string, :deprecated => "You can achieve the same behavior with the 'line' codec"
40+
config :message_format, :validate => :string, :obsolete => "You can achieve the same behavior with the 'line' codec"
4541

4642
# Flush interval (in seconds) for flushing writes to log files.
4743
# 0 will flush on every message.
@@ -76,10 +72,9 @@ class LogStash::Outputs::File < LogStash::Outputs::Base
7672
def register
7773
require "fileutils" # For mkdir_p
7874

79-
workers_not_supported
80-
8175
@files = {}
82-
76+
@io_mutex = Mutex.new
77+
8378
@path = File.expand_path(path)
8479

8580
validate_path
@@ -91,6 +86,7 @@ def register
9186
end
9287
@failure_path = File.join(@file_root, @filename_failure)
9388

89+
9490
now = Time.now
9591
@last_flush_cycle = now
9692
@last_stale_cleanup_cycle = now
@@ -101,8 +97,6 @@ def register
10197
@codec = LogStash::Plugin.lookup("codec", "line").new
10298
@codec.format = @message_format
10399
end
104-
105-
@codec.on_event(&method(:write_event))
106100
end # def register
107101

108102
private
@@ -125,20 +119,37 @@ def root_directory
125119
end
126120

127121
public
128-
def receive(event)
129-
@codec.encode(event)
130-
close_stale_files
122+
def multi_receive_encoded(events_and_encoded)
123+
encoded_by_path = Hash.new {|h,k| h[k] = []}
124+
125+
events_and_encoded.each do |event,encoded|
126+
file_output_path = event_path(event)
127+
encoded_by_path[file_output_path] << encoded
128+
end
129+
130+
@io_mutex.synchronize do
131+
encoded_by_path.each do |path,chunks|
132+
fd = open(path)
133+
chunks.each {|chunk| fd.write(chunk) }
134+
fd.flush
135+
end
136+
137+
close_stale_files
138+
end
131139
end # def receive
132140

133141
public
134142
def close
135-
@logger.debug("Close: closing files")
136-
@files.each do |path, fd|
137-
begin
138-
fd.close
139-
@logger.debug("Closed file #{path}", :fd => fd)
140-
rescue Exception => e
141-
@logger.error("Exception while flushing and closing files.", :exception => e)
143+
@io_mutex.synchronize do
144+
@logger.debug("Close: closing files")
145+
146+
@files.each do |path, fd|
147+
begin
148+
fd.close
149+
@logger.debug("Closed file #{path}", :fd => fd)
150+
rescue Exception => e
151+
@logger.error("Exception while flushing and closing files.", :exception => e)
152+
end
142153
end
143154
end
144155
end
@@ -150,7 +161,7 @@ def inside_file_root?(log_path)
150161
end
151162

152163
private
153-
def write_event(event, data)
164+
def event_path(event)
154165
file_output_path = generate_filepath(event)
155166
if path_with_field_ref? && !inside_file_root?(file_output_path)
156167
@logger.warn("File: the event tried to write outside the files root, writing the event to the failure file", :event => event, :filename => @failure_path)
@@ -159,10 +170,8 @@ def write_event(event, data)
159170
file_output_path = @failure_path
160171
end
161172
@logger.debug("File, writing event to file.", :filename => file_output_path)
162-
fd = open(file_output_path)
163-
# TODO(sissel): Check if we should rotate the file.
164-
fd.write(data)
165-
flush(fd)
173+
174+
file_output_path
166175
end
167176

168177
private
@@ -195,10 +204,12 @@ def flush(fd)
195204
def flush_pending_files
196205
return unless Time.now - @last_flush_cycle >= flush_interval
197206
@logger.debug("Starting flush cycle")
207+
198208
@files.each do |path, fd|
199209
@logger.debug("Flushing file", :path => path, :fd => fd)
200210
fd.flush
201211
end
212+
202213
@last_flush_cycle = Time.now
203214
end
204215

@@ -207,6 +218,7 @@ def flush_pending_files
207218
def close_stale_files
208219
now = Time.now
209220
return unless now - @last_stale_cleanup_cycle >= @stale_cleanup_interval
221+
210222
@logger.info("Starting stale files cleanup cycle", :files => @files)
211223
inactive_files = @files.select { |path, fd| not fd.active }
212224
@logger.debug("%d stale files found" % inactive_files.count, :inactive_files => inactive_files)
@@ -222,7 +234,7 @@ def close_stale_files
222234

223235
private
224236
def cached?(path)
225-
@files.include?(path) && !@files[path].nil?
237+
@files.include?(path) && !@files[path].nil?
226238
end
227239

228240
private
@@ -234,16 +246,19 @@ def deleted?(path)
234246
def open(path)
235247
if !deleted?(path) && cached?(path)
236248
return @files[path]
237-
elsif deleted?(path)
249+
end
250+
251+
if deleted?(path)
238252
if @create_if_deleted
239253
@logger.debug("Required path was deleted, creating the file again", :path => path)
240254
@files.delete(path)
241255
else
242256
return @files[path] if cached?(path)
243257
end
244258
end
245-
@logger.info("Opening file", :path => path)
246259

260+
@logger.info("Opening file", :path => path)
261+
247262
dir = File.dirname(path)
248263
if !Dir.exist?(dir)
249264
@logger.info("Creating directory", :directory => dir)
@@ -253,6 +268,7 @@ def open(path)
253268
FileUtils.mkdir_p(dir)
254269
end
255270
end
271+
256272
# work around a bug opening fifos (bug JRUBY-6280)
257273
stat = File.stat(path) rescue nil
258274
if stat && stat.ftype == "fifo" && LogStash::Environment.jruby?
@@ -288,6 +304,7 @@ def flush
288304
end
289305
def method_missing(method_name, *args, &block)
290306
if @io.respond_to?(method_name)
307+
291308
@io.send(method_name, *args, &block)
292309
else
293310
super

logstash-output-file.gemspec

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Gem::Specification.new do |s|
22

33
s.name = 'logstash-output-file'
4-
s.version = '3.0.2'
4+
s.version = '4.0.0'
55
s.licenses = ['Apache License (2.0)']
66
s.summary = "This output will write events to files on disk"
77
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -20,7 +20,7 @@ Gem::Specification.new do |s|
2020
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
2121

2222
# Gem dependencies
23-
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
23+
s.add_runtime_dependency "logstash-core-plugin-api", ">= 2.0.0", "< 2.99"
2424
s.add_runtime_dependency 'logstash-codec-json_lines'
2525
s.add_runtime_dependency 'logstash-codec-line'
2626

spec/outputs/file_spec.rb

Lines changed: 18 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
describe "ship lots of events to a file gzipped" do
4949
Stud::Temporary.file('logstash-spec-output-file') do |tmp_file|
50-
event_count = 10000 + rand(500)
50+
event_count = 100000 + rand(500)
5151

5252
config <<-CONFIG
5353
input {
@@ -125,13 +125,14 @@
125125

126126
10.times do |i|
127127
event = LogStash::Event.new("event_id" => i)
128-
output.receive(event)
128+
output.multi_receive([event])
129129
end
130130
FileUtils.rm(temp_file)
131131
10.times do |i|
132132
event = LogStash::Event.new("event_id" => i+10)
133-
output.receive(event)
133+
output.multi_receive([event])
134134
end
135+
135136
expect(FileTest.size(temp_file.path)).to be > 0
136137
end
137138

@@ -147,12 +148,12 @@
147148

148149
10.times do |i|
149150
event = LogStash::Event.new("event_id" => i)
150-
output.receive(event)
151+
output.multi_receive([event])
151152
end
152153
FileUtils.rm(temp_file)
153154
10.times do |i|
154155
event = LogStash::Event.new("event_id" => i+10)
155-
output.receive(event)
156+
output.multi_receive([event])
156157
end
157158
expect(FileTest.exist?(temp_file.path)).to be_falsey
158159
expect(FileTest.size(output.failure_path)).to be > 0
@@ -184,7 +185,7 @@
184185

185186
output = LogStash::Outputs::File.new(config)
186187
output.register
187-
output.receive(bad_event)
188+
output.multi_receive([bad_event])
188189

189190
error_file = File.join(path, config["filename_failure"])
190191

@@ -202,10 +203,10 @@
202203
output.register
203204

204205
bad_event.set('error', encoded_once)
205-
output.receive(bad_event)
206+
output.multi_receive([bad_event])
206207

207208
bad_event.set('error', encoded_twice)
208-
output.receive(bad_event)
209+
output.multi_receive([bad_event])
209210

210211
expect(Dir.glob(File.join(path, "*")).size).to eq(2)
211212
output.close
@@ -218,7 +219,7 @@
218219
output.register
219220

220221
bad_event.set('error', '../..//test')
221-
output.receive(bad_event)
222+
output.multi_receive([bad_event])
222223

223224
expect(Dir.glob(File.join(path, "*")).size).to eq(1)
224225
output.close
@@ -235,7 +236,7 @@
235236
config = { "path" => "#{path}/%{error}" }
236237
output = LogStash::Outputs::File.new(config)
237238
output.register
238-
output.receive(good_event)
239+
output.multi_receive([good_event])
239240

240241
good_file = File.join(path, good_event.get('error'))
241242
expect(File.exist?(good_file)).to eq(true)
@@ -254,7 +255,7 @@
254255
config = { "path" => dynamic_path }
255256
output = LogStash::Outputs::File.new(config)
256257
output.register
257-
output.receive(good_event)
258+
output.multi_receive([good_event])
258259

259260
expect(File.exist?(expected_path)).to eq(true)
260261
output.close
@@ -276,7 +277,7 @@
276277

277278
output = LogStash::Outputs::File.new(config)
278279
output.register
279-
output.receive(good_event)
280+
output.multi_receive([good_event])
280281

281282
expect(File.exist?(expected_path)).to eq(true)
282283
output.close
@@ -291,7 +292,7 @@
291292
config = { "path" => "#{path}/%{error}" }
292293
output = LogStash::Outputs::File.new(config)
293294
output.register
294-
output.receive(good_event)
295+
output.multi_receive([good_event])
295296

296297
good_file = File.join(path, good_event.get('error'))
297298
expect(File.exist?(good_file)).to eq(true)
@@ -310,7 +311,7 @@
310311
config = { "path" => "#{path}/output.txt" }
311312
output = LogStash::Outputs::File.new(config)
312313
output.register
313-
output.receive(good_event)
314+
output.multi_receive([good_event])
314315
good_file = File.join(path, 'output.txt')
315316
expect(File.exist?(good_file)).to eq(true)
316317
output.close #teardown first to allow reading the file
@@ -328,30 +329,9 @@
328329

329330
Stud::Temporary.directory do |path|
330331
config = { "path" => "#{path}/output.txt" }
331-
output = LogStash::Outputs::File.new(config)
332-
output.codec = LogStash::Codecs::Line.new({ "format" => "Custom format: %{message}"})
333-
output.register
334-
output.receive(good_event)
335-
good_file = File.join(path, 'output.txt')
336-
expect(File.exist?(good_file)).to eq(true)
337-
output.close #teardown first to allow reading the file
338-
File.open(good_file) {|f|
339-
line = f.readline
340-
expect(line).to eq("Custom format: hello world\n")
341-
}
342-
end
343-
end
344-
end
345-
context "when using deprecated message_format config" do
346-
it 'falls back to line codec' do
347-
good_event = LogStash::Event.new
348-
good_event.set('message', 'hello world')
349-
350-
Stud::Temporary.directory do |path|
351-
config = { "path" => "#{path}/output.txt", "message_format" => "Custom format: %{message}" }
352-
output = LogStash::Outputs::File.new(config)
332+
output = LogStash::Outputs::File.new(config.merge("codec" => LogStash::Codecs::Line.new({ "format" => "Custom format: %{message}"})))
353333
output.register
354-
output.receive(good_event)
334+
output.multi_receive([good_event])
355335
good_file = File.join(path, 'output.txt')
356336
expect(File.exist?(good_file)).to eq(true)
357337
output.close #teardown first to allow reading the file
@@ -375,7 +355,7 @@
375355
}
376356
output = LogStash::Outputs::File.new(config)
377357
output.register
378-
output.receive(good_event)
358+
output.multi_receive([good_event])
379359
good_file = File.join(path, 'is/nested/output.txt')
380360
expect(File.exist?(good_file)).to eq(true)
381361
expect(File.stat(good_file).mode.to_s(8)[-3..-1]).to eq('610')

0 commit comments

Comments
 (0)