From 81d7c3813b135503dab5ae56376c4005ae1cdc45 Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Thu, 5 Jan 2017 18:42:49 +0200 Subject: [PATCH 01/10] Make readline etc. more general --- base/io.jl | 59 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/base/io.jl b/base/io.jl index ec855c2a5e17b..06e3ab25b033c 100644 --- a/base/io.jl +++ b/base/io.jl @@ -174,7 +174,12 @@ Read a single line of text, including a trailing newline character (if one is re the end of the input), from the given I/O stream or file (defaults to `STDIN`). When reading from a file, the text is assumed to be encoded in UTF-8. """ -readline(filename::AbstractString) = open(readline, filename) +function readline(filename::AbstractString, chomp = false; nl2lf = false) + open(filename) do f + readline(f, chomp, nl2lf = nl2lf) + end +end + """ readlines(stream::IO) @@ -183,8 +188,11 @@ readline(filename::AbstractString) = open(readline, filename) Read all lines of an I/O stream or a file as a vector of strings. The text is assumed to be encoded in UTF-8. """ -readlines(filename::AbstractString) = open(readlines, filename) - +function readlines(filename::AbstractString, chomp = false; nl2lf = false) + open(filename) do f + readlines(f, chomp, nl2lf = nl2lf) + end +end ## byte-order mark, ntoh & hton ## @@ -448,7 +456,32 @@ function readuntil(s::IO, t::AbstractString) end readline() = readline(STDIN) -readline(s::IO) = readuntil(s, '\n') + +function readline(s::IO, chomp = false; nl2lf=false) + nl2lf && (chomp = false) + + linefeeds = ['\n', '\r', '\u85', '\u0B', '\u0c', '\u2028', '\u2029'] + out = IOBuffer() + while !eof(s) + c = read(s, Char) + if c in linefeeds + if c == '\r' && !eof(s) && Base.peek(s) == 0x0a + !(nl2lf || chomp) && write(out, c) + c = read(s, Char) + !chomp && write(out, c) + else + nl2lf && (c = '\n') + !chomp && write(out, c) + end + + break + else + write(out, c) + end + end + + return String(take!(out)) +end """ readchomp(x) @@ -513,8 +546,10 @@ readstring(filename::AbstractString) = open(readstring, filename) type EachLine stream::IO ondone::Function - EachLine(stream) = EachLine(stream, ()->nothing) - EachLine(stream, ondone) = new(stream, ondone) + chomp::Bool + nl2lf::Bool + EachLine(stream, chomp, nl2lf) = EachLine(stream, ()->nothing, chomp, nl2lf) + EachLine(stream, ondone, chomp, nl2lf) = new(stream, ondone, chomp, nl2lf) end """ @@ -524,10 +559,12 @@ end Create an iterable object that will yield each line from an I/O stream or a file. The text is assumed to be encoded in UTF-8. """ -eachline(stream::IO) = EachLine(stream) -function eachline(filename::AbstractString) +eachline(stream::IO, chomp = false; nl2lf=false) = EachLine(stream, chomp, nl2lf) + + +function eachline(filename::AbstractString, chomp = false; nl2lf=false) s = open(filename) - EachLine(s, ()->close(s)) + EachLine(s, chomp, nl2lf, ()->close(s)) end start(itr::EachLine) = nothing @@ -538,10 +575,10 @@ function done(itr::EachLine, nada) itr.ondone() true end -next(itr::EachLine, nada) = (readline(itr.stream), nothing) +next(itr::EachLine, nada) = (readline(itr.stream, itr.chomp, nl2lf = itr.nl2lf), nothing) eltype(::Type{EachLine}) = String -readlines(s=STDIN) = collect(eachline(s)) +readlines(s=STDIN, chomp = false; nl2lf = false) = collect(eachline(s, chomp, nl2lf = nl2lf)) iteratorsize(::Type{EachLine}) = SizeUnknown() From a04d81bed89d1bb4ea93c0845158c93379ab95b0 Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Fri, 6 Jan 2017 01:19:50 +0200 Subject: [PATCH 02/10] Make readlines faster and less general --- base/io.jl | 54 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/base/io.jl b/base/io.jl index 06e3ab25b033c..d91be1a4c6d1e 100644 --- a/base/io.jl +++ b/base/io.jl @@ -174,9 +174,9 @@ Read a single line of text, including a trailing newline character (if one is re the end of the input), from the given I/O stream or file (defaults to `STDIN`). When reading from a file, the text is assumed to be encoded in UTF-8. """ -function readline(filename::AbstractString, chomp = false; nl2lf = false) +function readline(filename::AbstractString, chomp = false) open(filename) do f - readline(f, chomp, nl2lf = nl2lf) + readline(f, chomp) end end @@ -188,9 +188,9 @@ end Read all lines of an I/O stream or a file as a vector of strings. The text is assumed to be encoded in UTF-8. """ -function readlines(filename::AbstractString, chomp = false; nl2lf = false) +function readlines(filename::AbstractString, chomp = false) open(filename) do f - readlines(f, chomp, nl2lf = nl2lf) + readlines(f, chomp) end end @@ -457,20 +457,38 @@ end readline() = readline(STDIN) -function readline(s::IO, chomp = false; nl2lf=false) - nl2lf && (chomp = false) +function readline(s::IO, chomp::Bool = false) + out = UInt8[] + while !eof(s) + c = read(s, UInt8) + if c == 0x0d && !eof(s) && Base.peek(s) == 0x0a + !chomp && push!(out, c) + c = read(s, UInt8) + !chomp && push!(out, c) + break + elseif c == 0x0a + !chomp && push!(out, c) + break + else + push!(out, c) + end + end - linefeeds = ['\n', '\r', '\u85', '\u0B', '\u0c', '\u2028', '\u2029'] + return String(out) +end + +const linefeeds = ['\n', '\r', '\u85', '\u0B', '\u0c', '\u2028', '\u2029'] + +function readline(s::IO, chomp = false, newlines::Vector{Char}=linefeeds) out = IOBuffer() while !eof(s) c = read(s, Char) - if c in linefeeds + if c in newlines if c == '\r' && !eof(s) && Base.peek(s) == 0x0a - !(nl2lf || chomp) && write(out, c) + !chomp && write(out, c) c = read(s, Char) !chomp && write(out, c) else - nl2lf && (c = '\n') !chomp && write(out, c) end @@ -547,9 +565,8 @@ type EachLine stream::IO ondone::Function chomp::Bool - nl2lf::Bool - EachLine(stream, chomp, nl2lf) = EachLine(stream, ()->nothing, chomp, nl2lf) - EachLine(stream, ondone, chomp, nl2lf) = new(stream, ondone, chomp, nl2lf) + EachLine(stream, chomp) = EachLine(stream, ()->nothing, chomp) + EachLine(stream, ondone, chomp) = new(stream, ondone, chomp) end """ @@ -559,12 +576,12 @@ end Create an iterable object that will yield each line from an I/O stream or a file. The text is assumed to be encoded in UTF-8. """ -eachline(stream::IO, chomp = false; nl2lf=false) = EachLine(stream, chomp, nl2lf) +eachline(stream::IO, chomp::Bool = false) = EachLine(stream, chomp) -function eachline(filename::AbstractString, chomp = false; nl2lf=false) +function eachline(filename::AbstractString, chomp::Bool = false) s = open(filename) - EachLine(s, chomp, nl2lf, ()->close(s)) + EachLine(s, ()->close(s), chomp) end start(itr::EachLine) = nothing @@ -575,10 +592,11 @@ function done(itr::EachLine, nada) itr.ondone() true end -next(itr::EachLine, nada) = (readline(itr.stream, itr.chomp, nl2lf = itr.nl2lf), nothing) + +next(itr::EachLine, nada) = (readline(itr.stream, itr.chomp), nothing) eltype(::Type{EachLine}) = String -readlines(s=STDIN, chomp = false; nl2lf = false) = collect(eachline(s, chomp, nl2lf = nl2lf)) +readlines(s=STDIN, chomp = false) = collect(eachline(s, chomp)) iteratorsize(::Type{EachLine}) = SizeUnknown() From fb30c92ac7d609ec226da276caeca0f574dc463a Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Sat, 7 Jan 2017 17:34:54 +0200 Subject: [PATCH 03/10] Implement readline in C --- base/io.jl | 35 ++++++--------------------- base/iostream.jl | 4 ++++ src/support/ios.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ src/support/ios.h | 1 + src/sys.c | 21 +++++++++++++++++ 5 files changed, 93 insertions(+), 28 deletions(-) diff --git a/base/io.jl b/base/io.jl index d91be1a4c6d1e..163e8a80c5304 100644 --- a/base/io.jl +++ b/base/io.jl @@ -457,14 +457,17 @@ end readline() = readline(STDIN) -function readline(s::IO, chomp::Bool = false) +function readline(s::IO, chomp::Bool=false) out = UInt8[] while !eof(s) c = read(s, UInt8) - if c == 0x0d && !eof(s) && Base.peek(s) == 0x0a - !chomp && push!(out, c) - c = read(s, UInt8) + if c == 0x0d !chomp && push!(out, c) + if !eof(s) && Base.peek(s) == 0x0a + c = read(s, UInt8) + !chomp && push!(out, c) + end + break elseif c == 0x0a !chomp && push!(out, c) @@ -477,30 +480,6 @@ function readline(s::IO, chomp::Bool = false) return String(out) end -const linefeeds = ['\n', '\r', '\u85', '\u0B', '\u0c', '\u2028', '\u2029'] - -function readline(s::IO, chomp = false, newlines::Vector{Char}=linefeeds) - out = IOBuffer() - while !eof(s) - c = read(s, Char) - if c in newlines - if c == '\r' && !eof(s) && Base.peek(s) == 0x0a - !chomp && write(out, c) - c = read(s, Char) - !chomp && write(out, c) - else - !chomp && write(out, c) - end - - break - else - write(out, c) - end - end - - return String(take!(out)) -end - """ readchomp(x) diff --git a/base/iostream.jl b/base/iostream.jl index 5899db85c5d29..962427a341ebb 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -225,6 +225,10 @@ function readuntil(s::IOStream, delim::UInt8) ccall(:jl_readuntil, Array{UInt8,1}, (Ptr{Void}, UInt8), s.ios, delim) end +function readline(s::IOStream, chomp::Bool=false) + String(ccall(:jl_readline, Array{UInt8,1}, (Ptr{Void}, Cint), s.ios, chomp)) +end + function readbytes_all!(s::IOStream, b::Array{UInt8}, nb) olb = lb = length(b) nr = 0 diff --git a/src/support/ios.c b/src/support/ios.c index 650a8690c0888..05f18e29a3f2b 100644 --- a/src/support/ios.c +++ b/src/support/ios.c @@ -831,6 +831,66 @@ size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) return total; } +//Copy until '\r', '\n' or '\r\n' +size_t ios_copyline(ios_t *to, ios_t *from, int chomp) +{ + size_t nchomp = 0; + size_t total = 0, avail = (size_t)(from->size - from->bpos); + size_t ntowrite; + while (!ios_eof(from)) { + if (avail == 0) { + avail = ios_readprep(from, LINE_CHUNK_SIZE); + if (avail == 0) + break; + } + size_t written; + + char *r = NULL; + char *n = NULL; + + for (size_t i = 0; i < avail; i++){ + char *p = (char*)from->buf+from->bpos+i; + char ch = from->buf[from->bpos+i]; + + if (ch == '\n'){ + n = p; + if (chomp) nchomp = 1; + ntowrite = n - (from->buf+from->bpos) + 1 - nchomp; + break; + } + if (ch == '\r'){ + r = p; + if (chomp) nchomp = 1; + ntowrite = r - (from->buf+from->bpos) + 1 - nchomp; + if (i <= avail){ + char ch2 = from->buf[from->bpos+i+1]; + if (ch2 == '\n'){ + if (chomp) nchomp = 2; + ntowrite = r - (from->buf+from->bpos) + 2 - nchomp; + } + } + break; + } + } + + if (r == NULL && n == NULL) { + written = ios_write(to, from->buf+from->bpos, avail); + from->bpos += avail; + total += written; + avail = 0; + } + else { + written = ios_write(to, from->buf+from->bpos, ntowrite); + from->bpos += ntowrite + nchomp; + total += written; + return total; + } + } + from->_eof = 1; + return total; +} + + static void _ios_init(ios_t *s) { // put all fields in a sane initial state diff --git a/src/support/ios.h b/src/support/ios.h index 6acd52fb01838..f39e48ce81de3 100644 --- a/src/support/ios.h +++ b/src/support/ios.h @@ -95,6 +95,7 @@ JL_DLLEXPORT void ios_set_readonly(ios_t *s); JL_DLLEXPORT size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes); JL_DLLEXPORT size_t ios_copyall(ios_t *to, ios_t *from); JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim); +JL_DLLEXPORT size_t ios_copyline(ios_t *to, ios_t *from, int chomp); // ensure at least n bytes are buffered if possible. returns # available. JL_DLLEXPORT size_t ios_readprep(ios_t *from, size_t n); diff --git a/src/sys.c b/src/sys.c index 7d685181e628b..902ee82583716 100644 --- a/src/sys.c +++ b/src/sys.c @@ -281,6 +281,27 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim) return (jl_value_t*)a; } +JL_DLLEXPORT jl_value_t *jl_readline(ios_t *s, int chomp) +{ + jl_array_t *a; + a = jl_alloc_array_1d(jl_array_uint8_type, 80); + ios_t dest; + ios_mem(&dest, 0); + ios_setbuf(&dest, (char*)a->data, 80, 0); + size_t n = ios_copyline(&dest, s, chomp); + if (dest.buf != a->data) { + a = jl_take_buffer(&dest); + } + else { +#ifdef STORE_ARRAY_LEN + a->length = n; +#endif + a->nrows = n; + ((char*)a->data)[n] = '\0'; + } + return (jl_value_t*)a; +} + JL_DLLEXPORT uint64_t jl_ios_get_nbyte_int(ios_t *s, const size_t n) { assert(n <= 8); From 80c94b8af60ff4aacf1dd23af78bbe03919bd645 Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Sat, 7 Jan 2017 20:05:44 +0200 Subject: [PATCH 04/10] Minor fixes according to comments --- base/io.jl | 14 +++++++------- src/support/ios.c | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/base/io.jl b/base/io.jl index 163e8a80c5304..03dc537526d6e 100644 --- a/base/io.jl +++ b/base/io.jl @@ -542,10 +542,10 @@ readstring(filename::AbstractString) = open(readstring, filename) type EachLine stream::IO - ondone::Function chomp::Bool - EachLine(stream, chomp) = EachLine(stream, ()->nothing, chomp) - EachLine(stream, ondone, chomp) = new(stream, ondone, chomp) + ondone::Function + EachLine(stream, chomp) = EachLine(stream, chomp, ()->nothing) + EachLine(stream, chomp, ondone) = new(stream, chomp, ondone) end """ @@ -555,12 +555,12 @@ end Create an iterable object that will yield each line from an I/O stream or a file. The text is assumed to be encoded in UTF-8. """ -eachline(stream::IO, chomp::Bool = false) = EachLine(stream, chomp) +eachline(stream::IO, chomp::Bool=false) = EachLine(stream, chomp) -function eachline(filename::AbstractString, chomp::Bool = false) +function eachline(filename::AbstractString, chomp::Bool=false) s = open(filename) - EachLine(s, ()->close(s), chomp) + EachLine(s, chomp, ()->close(s)) end start(itr::EachLine) = nothing @@ -575,7 +575,7 @@ end next(itr::EachLine, nada) = (readline(itr.stream, itr.chomp), nothing) eltype(::Type{EachLine}) = String -readlines(s=STDIN, chomp = false) = collect(eachline(s, chomp)) +readlines(s=STDIN, chomp::Bool=false) = collect(eachline(s, chomp)) iteratorsize(::Type{EachLine}) = SizeUnknown() diff --git a/src/support/ios.c b/src/support/ios.c index 05f18e29a3f2b..dc6048a950c53 100644 --- a/src/support/ios.c +++ b/src/support/ios.c @@ -831,7 +831,7 @@ size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) return total; } -//Copy until '\r', '\n' or '\r\n' +// Copy until '\r', '\n' or '\r\n' size_t ios_copyline(ios_t *to, ios_t *from, int chomp) { size_t nchomp = 0; @@ -851,7 +851,7 @@ size_t ios_copyline(ios_t *to, ios_t *from, int chomp) for (size_t i = 0; i < avail; i++){ char *p = (char*)from->buf+from->bpos+i; char ch = from->buf[from->bpos+i]; - + if (ch == '\n'){ n = p; if (chomp) nchomp = 1; @@ -861,8 +861,8 @@ size_t ios_copyline(ios_t *to, ios_t *from, int chomp) if (ch == '\r'){ r = p; if (chomp) nchomp = 1; - ntowrite = r - (from->buf+from->bpos) + 1 - nchomp; - if (i <= avail){ + ntowrite = r - (from->buf+from->bpos) + 1 - nchomp; + if (i < avail){ char ch2 = from->buf[from->bpos+i+1]; if (ch2 == '\n'){ if (chomp) nchomp = 2; From e3cfa28a4c733e7ef0dda3bd9a952ead41aa205a Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Sat, 7 Jan 2017 23:50:26 +0200 Subject: [PATCH 05/10] Add docstrings for readline --- base/io.jl | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/base/io.jl b/base/io.jl index 03dc537526d6e..b533db776e8a1 100644 --- a/base/io.jl +++ b/base/io.jl @@ -166,13 +166,17 @@ The text is assumed to be encoded in UTF-8. """ readuntil(filename::AbstractString, args...) = open(io->readuntil(io, args...), filename) +If true, newline characters and character combinations are stripped from the result; otherwise, newline characters or character combinations are preserved. + """ - readline(stream::IO=STDIN) - readline(filename::AbstractString) + readline(stream::IO=STDIN, chomp::Bool=false) + readline(filename::AbstractString, chomp::Bool=false) -Read a single line of text, including a trailing newline character (if one is reached before -the end of the input), from the given I/O stream or file (defaults to `STDIN`). -When reading from a file, the text is assumed to be encoded in UTF-8. +Read a single line of text including from the given I/O stream or file (defaults to `STDIN`). +Lines in the input can end in '\n', '\r', or '\r\n'. When reading from a file, the text is +assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included +in the output (if reached before the end of the input); otherwise newline characters(s) +are stripped from result. """ function readline(filename::AbstractString, chomp = false) open(filename) do f @@ -182,13 +186,16 @@ end """ - readlines(stream::IO) - readlines(filename::AbstractString) + readlines(stream::IO, chomp::Bool=false) + readlines(filename::AbstractString, chomp::Bool=false) -Read all lines of an I/O stream or a file as a vector of strings. -The text is assumed to be encoded in UTF-8. +Read all lines (delimited ) of an I/O stream or a file as a vector of strings. +Lines in the input can end in '\n', '\r', or '\r\n'. +The text is assumed to be encoded in UTF-8. If `chomp=false` +trailing newline character(s) will be included in the output; +otherwise newline characters(s) are stripped from result. """ -function readlines(filename::AbstractString, chomp = false) +function readlines(filename::AbstractString, chomp::Bool=false) open(filename) do f readlines(f, chomp) end @@ -549,11 +556,14 @@ type EachLine end """ - eachline(stream::IO) - eachline(filename::AbstractString) + eachline(stream::IO, chomp::Bool=false) + eachline(filename::AbstractString, chomp::Bool=false) Create an iterable object that will yield each line from an I/O stream or a file. -The text is assumed to be encoded in UTF-8. +Lines in the input can end in '\n', '\r', or '\r\n'. +The text is assumed to be encoded in UTF-8. If `chomp=false` +trailing newline character(s) will be included in the output; +otherwise newline characters(s) are stripped from result. """ eachline(stream::IO, chomp::Bool=false) = EachLine(stream, chomp) @@ -575,7 +585,7 @@ end next(itr::EachLine, nada) = (readline(itr.stream, itr.chomp), nothing) eltype(::Type{EachLine}) = String -readlines(s=STDIN, chomp::Bool=false) = collect(eachline(s, chomp)) +readlines(s::IO=STDIN, chomp::Bool=false) = collect(eachline(s, chomp)) iteratorsize(::Type{EachLine}) = SizeUnknown() From cffdf0de298a3ea2d6192d4482f1ba9ac46fcbc9 Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Sun, 8 Jan 2017 00:14:03 +0200 Subject: [PATCH 06/10] Make chomp remove \r, update docstrings --- base/io.jl | 8 +++----- base/strings/util.jl | 15 +++++++++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/base/io.jl b/base/io.jl index b533db776e8a1..f5adc720c19cb 100644 --- a/base/io.jl +++ b/base/io.jl @@ -166,14 +166,12 @@ The text is assumed to be encoded in UTF-8. """ readuntil(filename::AbstractString, args...) = open(io->readuntil(io, args...), filename) -If true, newline characters and character combinations are stripped from the result; otherwise, newline characters or character combinations are preserved. - """ readline(stream::IO=STDIN, chomp::Bool=false) readline(filename::AbstractString, chomp::Bool=false) Read a single line of text including from the given I/O stream or file (defaults to `STDIN`). -Lines in the input can end in '\n', '\r', or '\r\n'. When reading from a file, the text is +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. When reading from a file, the text is assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included in the output (if reached before the end of the input); otherwise newline characters(s) are stripped from result. @@ -190,7 +188,7 @@ end readlines(filename::AbstractString, chomp::Bool=false) Read all lines (delimited ) of an I/O stream or a file as a vector of strings. -Lines in the input can end in '\n', '\r', or '\r\n'. +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. The text is assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included in the output; otherwise newline characters(s) are stripped from result. @@ -560,7 +558,7 @@ end eachline(filename::AbstractString, chomp::Bool=false) Create an iterable object that will yield each line from an I/O stream or a file. -Lines in the input can end in '\n', '\r', or '\r\n'. +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. The text is assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included in the output; otherwise newline characters(s) are stripped from result. diff --git a/base/strings/util.jl b/base/strings/util.jl index cf6b630191a24..2df38d4c630df 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -77,18 +77,23 @@ chop(s::AbstractString) = SubString(s, 1, endof(s)-1) chomp(s::AbstractString) Remove a single trailing newline from a string. +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. """ function chomp(s::AbstractString) i = endof(s) - if (i < 1 || s[i] != '\n') return SubString(s, 1, i) end + if (i < 1 || (s[i] != '\n' && s[i] != '\r')) return SubString(s, 1, i) end + if (s[i] == '\r') return SubString(s, 1, i-1) end j = prevind(s,i) - if (j < 1 || s[j] != '\r') return SubString(s, 1, i-1) end + if (j < 1 || (s[j] != '\r' && s[i] == '\n')) return SubString(s, 1, i-1) end return SubString(s, 1, j-1) end + function chomp(s::String) i = endof(s) - if i < 1 || s.data[i] != 0x0a + if i < 1 || (s.data[i] != 0x0a && s.data[i] != 0x0d) SubString(s, 1, i) + elseif s.data[i] == 0x0d + SubString(s, 1, i-1) elseif i < 2 || s.data[i-1] != 0x0d SubString(s, 1, i-1) else @@ -98,9 +103,11 @@ end # NOTE: use with caution -- breaks the immutable string convention! function chomp!(s::String) - if !isempty(s) && s.data[end] == 0x0a + if !isempty(s) && s.data[end] == 0x0a n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2 ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n) + elseif s.data[end] == 0x0d + ccall(:jl_array_del_end, Void, (Any, UInt), s.data, 1) end return s end From 603baae0e9a4b77365354a6372692b9d1d208e7a Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Sun, 8 Jan 2017 00:25:11 +0200 Subject: [PATCH 07/10] Tweak docstring --- base/io.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/io.jl b/base/io.jl index f5adc720c19cb..f5b23d1c9c3b2 100644 --- a/base/io.jl +++ b/base/io.jl @@ -187,7 +187,7 @@ end readlines(stream::IO, chomp::Bool=false) readlines(filename::AbstractString, chomp::Bool=false) -Read all lines (delimited ) of an I/O stream or a file as a vector of strings. +Read all lines of an I/O stream or a file as a vector of strings. Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. The text is assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included in the output; From 7e9ceddbd12c43d4674d4b8b2a23f12c8cd64e1b Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Sun, 8 Jan 2017 00:37:02 +0200 Subject: [PATCH 08/10] Add test for readlines(s::IO) --- test/iobuffer.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/iobuffer.jl b/test/iobuffer.jl index 53b7eb4df806b..4e14feb12768d 100644 --- a/test/iobuffer.jl +++ b/test/iobuffer.jl @@ -74,6 +74,20 @@ Base.compact(io) @test_throws ArgumentError seek(io,0) @test_throws ArgumentError truncate(io,0) @test readline(io) == "whipped cream\n" +@test write(io,"pancakes\r\nwaffles\n\rblueberries\r") > 0 +@test readline(io) == "pancakes\r\n" +@test readline(io) == "waffles\n" +@test readline(io) == "\r" +@test readline(io) == "blueberries\r" +write(io,"pancakes\r\nwaffles\n\rblueberries\r") +@test readline(io, true) == "pancakes" +@test readline(io, true) == "waffles" +@test readline(io, true) == "" +@test readline(io, true) == "blueberries" +write(io,"pancakes\r\nwaffles\n\rblueberries\r") +@test readlines(io) == String["pancakes\r\n","waffles\n","\r","blueberries\r"] +write(io,"pancakes\r\nwaffles\n\rblueberries\r") +@test readlines(io, true) == String["pancakes","waffles","","blueberries"] Base.compact(io) @test position(io) == 0 @test ioslength(io) == 0 From bb7475e155e4c6980d5f4b1d8cb46620a356f2dc Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Sun, 8 Jan 2017 00:57:00 +0200 Subject: [PATCH 09/10] Add tests for readlines and chomp --- test/read.jl | 12 ++++++++++++ test/strings/util.jl | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/test/read.jl b/test/read.jl index d753c8bdd33b2..fccfe38d73c2d 100644 --- a/test/read.jl +++ b/test/read.jl @@ -7,6 +7,7 @@ tasks = [] # Create test file filename = joinpath(dir, "file.txt") text = "C1,C2\n1,2\na,b\n" +text2 = "line1\rline2\nline\r\nline4" # List of IO producers l = Vector{Tuple{AbstractString,Function}}() @@ -44,6 +45,10 @@ s = io(text) close(s) push!(l, ("IOBuffer", io)) +# Readlines +write(filename, text2) +readlines(filename) == String["line1\r","line2\n","line\r\n","line4"] +readlines(filename, true) == String["line1","line2","line","line4"] function run_test_server(srv, text) push!(tasks, @async begin @@ -243,12 +248,19 @@ for (name, f) in l verbose && println("$name readline...") @test readline(io()) == readline(IOBuffer(text)) @test readline(io()) == readline(filename) + @test readline(io(), true) == readline(IOBuffer(text), true) + @test readline(io(), true) == readline(filename, true) verbose && println("$name readlines...") @test readlines(io()) == readlines(IOBuffer(text)) @test readlines(io()) == readlines(filename) + @test readlines(io(), true) == readlines(IOBuffer(text), true) + @test readlines(io(), true) == readlines(filename, true) + @test collect(eachline(io())) == collect(eachline(IOBuffer(text))) @test collect(eachline(io())) == collect(eachline(filename)) + @test collect(eachline(io(), true)) == collect(eachline(IOBuffer(text), true)) + @test collect(eachline(io(), true)) == collect(eachline(filename, true)) cleanup() diff --git a/test/strings/util.jl b/test/strings/util.jl index 21072a524bf4c..66d850a640616 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -213,6 +213,12 @@ end @test chop("fooε") == "foo" @test isa(chomp("foo"), SubString) @test isa(chop("foo"), SubString) +@test chomp("foo\r\n") == "foo" +@test chomp("foo\r") == "foo" +@test chomp("foo\r\r") == "foo\r" +@test Base.chomp!("foo\r\n") == "foo" +@test Base.chomp!("foo\r") == "foo" +@test Base.chomp!("foo\r\r") == "foo\r" # bytes2hex and hex2bytes hex_str = "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592" From 3538affaac432040813f2720d6ddbd6783676e71 Mon Sep 17 00:00:00 2001 From: Matti Pastell Date: Sun, 8 Jan 2017 01:02:16 +0200 Subject: [PATCH 10/10] Fix trailing whitespace --- base/io.jl | 10 +++++----- base/strings/util.jl | 4 ++-- test/read.jl | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/io.jl b/base/io.jl index f5b23d1c9c3b2..1906a7aa40d44 100644 --- a/base/io.jl +++ b/base/io.jl @@ -171,9 +171,9 @@ readuntil(filename::AbstractString, args...) = open(io->readuntil(io, args...), readline(filename::AbstractString, chomp::Bool=false) Read a single line of text including from the given I/O stream or file (defaults to `STDIN`). -Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. When reading from a file, the text is +Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. When reading from a file, the text is assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included -in the output (if reached before the end of the input); otherwise newline characters(s) +in the output (if reached before the end of the input); otherwise newline characters(s) are stripped from result. """ function readline(filename::AbstractString, chomp = false) @@ -189,7 +189,7 @@ end Read all lines of an I/O stream or a file as a vector of strings. Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. -The text is assumed to be encoded in UTF-8. If `chomp=false` +The text is assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included in the output; otherwise newline characters(s) are stripped from result. """ @@ -466,7 +466,7 @@ function readline(s::IO, chomp::Bool=false) out = UInt8[] while !eof(s) c = read(s, UInt8) - if c == 0x0d + if c == 0x0d !chomp && push!(out, c) if !eof(s) && Base.peek(s) == 0x0a c = read(s, UInt8) @@ -559,7 +559,7 @@ end Create an iterable object that will yield each line from an I/O stream or a file. Lines in the input can end in `'\\n'`, `'\\r'`, or `'\\r\\n'`. -The text is assumed to be encoded in UTF-8. If `chomp=false` +The text is assumed to be encoded in UTF-8. If `chomp=false` trailing newline character(s) will be included in the output; otherwise newline characters(s) are stripped from result. """ diff --git a/base/strings/util.jl b/base/strings/util.jl index 2df38d4c630df..5833b79b57b65 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -92,7 +92,7 @@ function chomp(s::String) i = endof(s) if i < 1 || (s.data[i] != 0x0a && s.data[i] != 0x0d) SubString(s, 1, i) - elseif s.data[i] == 0x0d + elseif s.data[i] == 0x0d SubString(s, 1, i-1) elseif i < 2 || s.data[i-1] != 0x0d SubString(s, 1, i-1) @@ -103,7 +103,7 @@ end # NOTE: use with caution -- breaks the immutable string convention! function chomp!(s::String) - if !isempty(s) && s.data[end] == 0x0a + if !isempty(s) && s.data[end] == 0x0a n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2 ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n) elseif s.data[end] == 0x0d diff --git a/test/read.jl b/test/read.jl index fccfe38d73c2d..ab4779ce9b492 100644 --- a/test/read.jl +++ b/test/read.jl @@ -256,7 +256,7 @@ for (name, f) in l @test readlines(io()) == readlines(filename) @test readlines(io(), true) == readlines(IOBuffer(text), true) @test readlines(io(), true) == readlines(filename, true) - + @test collect(eachline(io())) == collect(eachline(IOBuffer(text))) @test collect(eachline(io())) == collect(eachline(filename)) @test collect(eachline(io(), true)) == collect(eachline(IOBuffer(text), true))