From 8a579bb8d4c6906a17857f35d03965a608a1ba52 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 10 Aug 2023 14:19:00 -0500 Subject: [PATCH 1/2] Update getline logic to match C version See commits: * a83ddbb7f036fd44b198364a0a8170ef5bf0f983 * eb322a9716a74dbbd6e1b45652395bda24533da5 --- ext/java/org/jruby/ext/stringio/StringIO.java | 57 ++++++++++--------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 7c1abb0..49de2c8 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -677,12 +677,7 @@ private IRubyObject getline(ThreadContext context, final IRubyObject rs, int lim } str = strioSubstr(runtime, ptr.pos, e - s - w, enc); } else if ((n = ((RubyString) rs).size()) == 0) { - // this is not an exact port; the original confused me - // in MRI, the next loop appears to have a complicated boolean to determine the index, but in actuality - // it just resolves to p (+ 0) as below. We theorize that the MRI logic may have originally been - // intended to skip all \n and \r, but because p does not get incremented before the \r check that - // logic never fires. We use the original logic that did not have these strange flaws. - // See https://github.com/ruby/ruby/commit/30540c567569d3486ccbf59b59d903d5778f04d5 + int paragraph_end = 0; p = s; while (stringBytes[p] == '\n') { if (++p == e) { @@ -691,22 +686,21 @@ private IRubyObject getline(ThreadContext context, final IRubyObject rs, int lim } s = p; while ((p = StringSupport.memchr(stringBytes, p, '\n', e - p)) != -1 && (p != e)) { - p += 1; - if (p == e) break; - - if (stringBytes[p] == '\n') { - e = p + 1; - w = (chomp ? 1 : 0); - break; + p++; + if (!((p < e && stringBytes[p] == '\n') || + (p + 1 < e && stringBytes[p] == '\r' && stringBytes[p+1] == '\n'))) { + continue; } - else if (stringBytes[p] == '\r' && p < e && stringBytes[p + 1] == '\n') { - e = p + 2; - w = (chomp ? 2 : 0); - break; + paragraph_end = p - ((stringBytes[p-2] == '\r') ? 2 : 1); + while ((p < e && stringBytes[p] == '\n') || + (p + 1 < e && stringBytes[p] == '\r' && stringBytes[p+1] == '\n')) { + p += (stringBytes[p] == '\r') ? 2 : 1; } + e = p; + break; } - if (w == 0 && chomp) { - w = chompNewlineWidth(stringBytes, s, e); + if (chomp && paragraph_end != 0) { + w = e - paragraph_end; } str = strioSubstr(runtime, s - begin, e - s - w, enc); } else if (n == 1) { @@ -718,17 +712,28 @@ else if (stringBytes[p] == '\r' && p < e && stringBytes[p + 1] == '\n') { } str = strioSubstr(runtime, ptr.pos, e - s - w, enc); } else { - if (n < e - s) { + if (n < e - s + (chomp ? 1 : 0)) { RubyString rsStr = (RubyString) rs; ByteList rsByteList = rsStr.getByteList(); byte[] rsBytes = rsByteList.getUnsafeBytes(); - int[] skip = new int[1 << CHAR_BIT]; - int pos; - p = rsByteList.getBegin(); - bm_init_skip(skip, rsBytes, p, n); - if ((pos = bm_search(rsBytes, p, n, stringBytes, s, e - s, skip)) >= 0) { - e = s + pos + n; + /* unless chomping, RS at the end does not matter */ + if (e - s < 1024 || n == e - s) { + for (p = s; p + n <= e; ++p) { + if (ByteList.memcmp(stringBytes, p, rsBytes, 0, n) == 0) { + e = p + n; + w = (chomp ? n : 0); + break; + } + } + } else { + int[] skip = new int[1 << CHAR_BIT]; + int pos; + p = rsByteList.getBegin(); + bm_init_skip(skip, rsBytes, p, n); + if ((pos = bm_search(rsBytes, p, n, stringBytes, s, e - s, skip)) >= 0) { + e = s + pos + n; + } } } str = strioSubstr(runtime, ptr.pos, e - s - w, enc); From 64b27711646b8d616b7a300e6cb1ad997a488dda Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 14 Aug 2023 17:27:43 -0500 Subject: [PATCH 2/2] Additional fixes for chomp with nil sep See ruby/stringio@feaa2ec63176c2a9d77bda354ef68f3e7eb66e1a --- ext/java/org/jruby/ext/stringio/StringIO.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 49de2c8..1628645 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -599,6 +599,8 @@ public IRubyObject getline(ThreadContext context, StringIO self, IRubyObject rs, return RubyString.newEmptyString(context.runtime, self.getEncoding()); } + if (rs.isNil()) chomp = false; + IRubyObject result = self.getline(context, rs, limit, chomp); context.setLastLine(result); @@ -616,6 +618,8 @@ public StringIO getline(ThreadContext context, StringIO self, IRubyObject rs, in throw context.runtime.newArgumentError("invalid limit: 0 for each_line"); } + if (rs.isNil()) chomp = false; + while (!(line = self.getline(context, rs, limit, chomp)).isNil()) { block.yieldSpecific(context, line); } @@ -634,6 +638,8 @@ public RubyArray getline(ThreadContext context, StringIO self, IRubyObject rs, i throw context.runtime.newArgumentError("invalid limit: 0 for readlines"); } + if (rs.isNil()) chomp = false; + while (!(line = self.getline(context, rs, limit, chomp)).isNil()) { ary.append(line); }