diff --git a/base/io.jl b/base/io.jl index ec855c2a5e17b..1906a7aa40d44 100644 --- a/base/io.jl +++ b/base/io.jl @@ -167,24 +167,37 @@ The text is assumed to be encoded in UTF-8. readuntil(filename::AbstractString, args...) = open(io->readuntil(io, args...), filename) """ - readline(stream::IO=STDIN) - readline(filename::AbstractString) + readline(stream::IO=STDIN, chomp::Bool=false) + readline(filename::AbstractString, chomp::Bool=false) + +Read a single line of text including from the given I/O stream or file (defaults to `STDIN`). +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. When reading from a file, the text is +assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included +in the output (if reached before the end of the input); otherwise newline characters(s) +are stripped from result. +""" +function readline(filename::AbstractString, chomp = false) + open(filename) do f + readline(f, chomp) + end +end -Read a single line of text, including a trailing newline character (if one is reached before -the end of the input), from the given I/O stream or file (defaults to `STDIN`). -When reading from a file, the text is assumed to be encoded in UTF-8. -""" -readline(filename::AbstractString) = open(readline, filename) """ - readlines(stream::IO) - readlines(filename::AbstractString) + readlines(stream::IO, chomp::Bool=false) + readlines(filename::AbstractString, chomp::Bool=false) Read all lines of an I/O stream or a file as a vector of strings. -The text is assumed to be encoded in UTF-8. -""" -readlines(filename::AbstractString) = open(readlines, filename) - +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. +The text is assumed to be encoded in UTF-8. If `chomp=false` +trailing newline character(s) will be included in the output; +otherwise newline characters(s) are stripped from result. +""" +function readlines(filename::AbstractString, chomp::Bool=false) + open(filename) do f + readlines(f, chomp) + end +end ## byte-order mark, ntoh & hton ## @@ -448,7 +461,29 @@ function readuntil(s::IO, t::AbstractString) end readline() = readline(STDIN) -readline(s::IO) = readuntil(s, '\n') + +function readline(s::IO, chomp::Bool=false) + out = UInt8[] + while !eof(s) + c = read(s, UInt8) + if c == 0x0d + !chomp && push!(out, c) + if !eof(s) && Base.peek(s) == 0x0a + c = read(s, UInt8) + !chomp && push!(out, c) + end + + break + elseif c == 0x0a + !chomp && push!(out, c) + break + else + push!(out, c) + end + end + + return String(out) +end """ readchomp(x) @@ -512,22 +547,28 @@ readstring(filename::AbstractString) = open(readstring, filename) type EachLine stream::IO + chomp::Bool ondone::Function - EachLine(stream) = EachLine(stream, ()->nothing) - EachLine(stream, ondone) = new(stream, ondone) + EachLine(stream, chomp) = EachLine(stream, chomp, ()->nothing) + EachLine(stream, chomp, ondone) = new(stream, chomp, ondone) end """ - eachline(stream::IO) - eachline(filename::AbstractString) + eachline(stream::IO, chomp::Bool=false) + eachline(filename::AbstractString, chomp::Bool=false) Create an iterable object that will yield each line from an I/O stream or a file. -The text is assumed to be encoded in UTF-8. +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. +The text is assumed to be encoded in UTF-8. If `chomp=false` +trailing newline character(s) will be included in the output; +otherwise newline characters(s) are stripped from result. """ -eachline(stream::IO) = EachLine(stream) -function eachline(filename::AbstractString) +eachline(stream::IO, chomp::Bool=false) = EachLine(stream, chomp) + + +function eachline(filename::AbstractString, chomp::Bool=false) s = open(filename) - EachLine(s, ()->close(s)) + EachLine(s, chomp, ()->close(s)) end start(itr::EachLine) = nothing @@ -538,10 +579,11 @@ function done(itr::EachLine, nada) itr.ondone() true end -next(itr::EachLine, nada) = (readline(itr.stream), nothing) + +next(itr::EachLine, nada) = (readline(itr.stream, itr.chomp), nothing) eltype(::Type{EachLine}) = String -readlines(s=STDIN) = collect(eachline(s)) +readlines(s::IO=STDIN, chomp::Bool=false) = collect(eachline(s, chomp)) iteratorsize(::Type{EachLine}) = SizeUnknown() diff --git a/base/iostream.jl b/base/iostream.jl index 5899db85c5d29..962427a341ebb 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -225,6 +225,10 @@ function readuntil(s::IOStream, delim::UInt8) ccall(:jl_readuntil, Array{UInt8,1}, (Ptr{Void}, UInt8), s.ios, delim) end +function readline(s::IOStream, chomp::Bool=false) + String(ccall(:jl_readline, Array{UInt8,1}, (Ptr{Void}, Cint), s.ios, chomp)) +end + function readbytes_all!(s::IOStream, b::Array{UInt8}, nb) olb = lb = length(b) nr = 0 diff --git a/base/strings/util.jl b/base/strings/util.jl index cf6b630191a24..5833b79b57b65 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -77,18 +77,23 @@ chop(s::AbstractString) = SubString(s, 1, endof(s)-1) chomp(s::AbstractString) Remove a single trailing newline from a string. +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. """ function chomp(s::AbstractString) i = endof(s) - if (i < 1 || s[i] != '\n') return SubString(s, 1, i) end + if (i < 1 || (s[i] != '\n' && s[i] != '\r')) return SubString(s, 1, i) end + if (s[i] == '\r') return SubString(s, 1, i-1) end j = prevind(s,i) - if (j < 1 || s[j] != '\r') return SubString(s, 1, i-1) end + if (j < 1 || (s[j] != '\r' && s[i] == '\n')) return SubString(s, 1, i-1) end return SubString(s, 1, j-1) end + function chomp(s::String) i = endof(s) - if i < 1 || s.data[i] != 0x0a + if i < 1 || (s.data[i] != 0x0a && s.data[i] != 0x0d) SubString(s, 1, i) + elseif s.data[i] == 0x0d + SubString(s, 1, i-1) elseif i < 2 || s.data[i-1] != 0x0d SubString(s, 1, i-1) else @@ -101,6 +106,8 @@ function chomp!(s::String) if !isempty(s) && s.data[end] == 0x0a n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2 ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n) + elseif s.data[end] == 0x0d + ccall(:jl_array_del_end, Void, (Any, UInt), s.data, 1) end return s end diff --git a/src/support/ios.c b/src/support/ios.c index 650a8690c0888..dc6048a950c53 100644 --- a/src/support/ios.c +++ b/src/support/ios.c @@ -831,6 +831,66 @@ size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) return total; } +// Copy until '\r', '\n' or '\r\n' +size_t ios_copyline(ios_t *to, ios_t *from, int chomp) +{ + size_t nchomp = 0; + size_t total = 0, avail = (size_t)(from->size - from->bpos); + size_t ntowrite; + while (!ios_eof(from)) { + if (avail == 0) { + avail = ios_readprep(from, LINE_CHUNK_SIZE); + if (avail == 0) + break; + } + size_t written; + + char *r = NULL; + char *n = NULL; + + for (size_t i = 0; i < avail; i++){ + char *p = (char*)from->buf+from->bpos+i; + char ch = from->buf[from->bpos+i]; + + if (ch == '\n'){ + n = p; + if (chomp) nchomp = 1; + ntowrite = n - (from->buf+from->bpos) + 1 - nchomp; + break; + } + if (ch == '\r'){ + r = p; + if (chomp) nchomp = 1; + ntowrite = r - (from->buf+from->bpos) + 1 - nchomp; + if (i < avail){ + char ch2 = from->buf[from->bpos+i+1]; + if (ch2 == '\n'){ + if (chomp) nchomp = 2; + ntowrite = r - (from->buf+from->bpos) + 2 - nchomp; + } + } + break; + } + } + + if (r == NULL && n == NULL) { + written = ios_write(to, from->buf+from->bpos, avail); + from->bpos += avail; + total += written; + avail = 0; + } + else { + written = ios_write(to, from->buf+from->bpos, ntowrite); + from->bpos += ntowrite + nchomp; + total += written; + return total; + } + } + from->_eof = 1; + return total; +} + + static void _ios_init(ios_t *s) { // put all fields in a sane initial state diff --git a/src/support/ios.h b/src/support/ios.h index 6acd52fb01838..f39e48ce81de3 100644 --- a/src/support/ios.h +++ b/src/support/ios.h @@ -95,6 +95,7 @@ JL_DLLEXPORT void ios_set_readonly(ios_t *s); JL_DLLEXPORT size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes); JL_DLLEXPORT size_t ios_copyall(ios_t *to, ios_t *from); JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim); +JL_DLLEXPORT size_t ios_copyline(ios_t *to, ios_t *from, int chomp); // ensure at least n bytes are buffered if possible. returns # available. JL_DLLEXPORT size_t ios_readprep(ios_t *from, size_t n); diff --git a/src/sys.c b/src/sys.c index 7d685181e628b..902ee82583716 100644 --- a/src/sys.c +++ b/src/sys.c @@ -281,6 +281,27 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim) return (jl_value_t*)a; } +JL_DLLEXPORT jl_value_t *jl_readline(ios_t *s, int chomp) +{ + jl_array_t *a; + a = jl_alloc_array_1d(jl_array_uint8_type, 80); + ios_t dest; + ios_mem(&dest, 0); + ios_setbuf(&dest, (char*)a->data, 80, 0); + size_t n = ios_copyline(&dest, s, chomp); + if (dest.buf != a->data) { + a = jl_take_buffer(&dest); + } + else { +#ifdef STORE_ARRAY_LEN + a->length = n; +#endif + a->nrows = n; + ((char*)a->data)[n] = '\0'; + } + return (jl_value_t*)a; +} + JL_DLLEXPORT uint64_t jl_ios_get_nbyte_int(ios_t *s, const size_t n) { assert(n <= 8); diff --git a/test/iobuffer.jl b/test/iobuffer.jl index 53b7eb4df806b..4e14feb12768d 100644 --- a/test/iobuffer.jl +++ b/test/iobuffer.jl @@ -74,6 +74,20 @@ Base.compact(io) @test_throws ArgumentError seek(io,0) @test_throws ArgumentError truncate(io,0) @test readline(io) == "whipped cream\n" +@test write(io,"pancakes\r\nwaffles\n\rblueberries\r") > 0 +@test readline(io) == "pancakes\r\n" +@test readline(io) == "waffles\n" +@test readline(io) == "\r" +@test readline(io) == "blueberries\r" +write(io,"pancakes\r\nwaffles\n\rblueberries\r") +@test readline(io, true) == "pancakes" +@test readline(io, true) == "waffles" +@test readline(io, true) == "" +@test readline(io, true) == "blueberries" +write(io,"pancakes\r\nwaffles\n\rblueberries\r") +@test readlines(io) == String["pancakes\r\n","waffles\n","\r","blueberries\r"] +write(io,"pancakes\r\nwaffles\n\rblueberries\r") +@test readlines(io, true) == String["pancakes","waffles","","blueberries"] Base.compact(io) @test position(io) == 0 @test ioslength(io) == 0 diff --git a/test/read.jl b/test/read.jl index d753c8bdd33b2..ab4779ce9b492 100644 --- a/test/read.jl +++ b/test/read.jl @@ -7,6 +7,7 @@ tasks = [] # Create test file filename = joinpath(dir, "file.txt") text = "C1,C2\n1,2\na,b\n" +text2 = "line1\rline2\nline\r\nline4" # List of IO producers l = Vector{Tuple{AbstractString,Function}}() @@ -44,6 +45,10 @@ s = io(text) close(s) push!(l, ("IOBuffer", io)) +# Readlines +write(filename, text2) +readlines(filename) == String["line1\r","line2\n","line\r\n","line4"] +readlines(filename, true) == String["line1","line2","line","line4"] function run_test_server(srv, text) push!(tasks, @async begin @@ -243,12 +248,19 @@ for (name, f) in l verbose && println("$name readline...") @test readline(io()) == readline(IOBuffer(text)) @test readline(io()) == readline(filename) + @test readline(io(), true) == readline(IOBuffer(text), true) + @test readline(io(), true) == readline(filename, true) verbose && println("$name readlines...") @test readlines(io()) == readlines(IOBuffer(text)) @test readlines(io()) == readlines(filename) + @test readlines(io(), true) == readlines(IOBuffer(text), true) + @test readlines(io(), true) == readlines(filename, true) + @test collect(eachline(io())) == collect(eachline(IOBuffer(text))) @test collect(eachline(io())) == collect(eachline(filename)) + @test collect(eachline(io(), true)) == collect(eachline(IOBuffer(text), true)) + @test collect(eachline(io(), true)) == collect(eachline(filename, true)) cleanup() diff --git a/test/strings/util.jl b/test/strings/util.jl index 21072a524bf4c..66d850a640616 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -213,6 +213,12 @@ end @test chop("fooε") == "foo" @test isa(chomp("foo"), SubString) @test isa(chop("foo"), SubString) +@test chomp("foo\r\n") == "foo" +@test chomp("foo\r") == "foo" +@test chomp("foo\r\r") == "foo\r" +@test Base.chomp!("foo\r\n") == "foo" +@test Base.chomp!("foo\r") == "foo" +@test Base.chomp!("foo\r\r") == "foo\r" # bytes2hex and hex2bytes hex_str = "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592"