From b26fcae8d952044913008ff145571058ea2856ca Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Wed, 18 Apr 2018 14:40:09 +0200 Subject: [PATCH 1/2] Avoid BytesRef's copying in ScriptDocValues's Strings This commit refactors ScriptDocValues.Strings to directly creates String objects instead of using an intermediate BytesRef's copy. ScriptDocValues.Binary is also changed to create a single copy of BytesRef per consumed value. Relates #29567 --- .../index/fielddata/ScriptDocValues.java | 123 ++++++++---------- 1 file changed, 56 insertions(+), 67 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java index 01f6bc192988c..6f2a3e0fee410 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java @@ -573,43 +573,42 @@ private static boolean[] grow(boolean[] array, int minSize) { } - abstract static class BinaryScriptDocValues extends ScriptDocValues { - + public static final class Strings extends ScriptDocValues { private final SortedBinaryDocValues in; - protected BytesRefBuilder[] values = new BytesRefBuilder[0]; - protected int count; + private String[] values = new String[0]; + private int count; - BinaryScriptDocValues(SortedBinaryDocValues in) { + public Strings(SortedBinaryDocValues in) { this.in = in; } @Override - public void setNextDocId(int docId) throws IOException { - if (in.advanceExact(docId)) { - resize(in.docValueCount()); - for (int i = 0; i < count; i++) { - // We need to make a copy here, because BytesBinaryDVAtomicFieldData's SortedBinaryDocValues - // implementation reuses the returned BytesRef. Otherwise we would end up with the same BytesRef - // instance for all slots in the values array. - values[i].copyBytes(in.nextValue()); - } - } else { - resize(0); - } + public String get(int index) { + return values[index]; + } + + public String getValue() { + return count == 0 ? null : values[0]; } /** * Set the {@link #size()} and ensure that the {@link #values} array can * store at least that many entries. - */ - protected void resize(int newSize) { + */ + private void resize(int newSize) { + values = ArrayUtil.grow(values, newSize); count = newSize; - if (newSize > values.length) { - final int oldLength = values.length; - values = ArrayUtil.grow(values, count); - for (int i = oldLength; i < values.length; ++i) { - values[i] = new BytesRefBuilder(); + } + + @Override + public void setNextDocId(int docId) throws IOException { + if (in.advanceExact(docId)) { + resize(in.docValueCount()); + for (int i = 0; i < count; i++) { + values[i] = in.nextValue().utf8ToString(); } + } else { + resize(0); } } @@ -617,65 +616,55 @@ protected void resize(int newSize) { public int size() { return count; } - } - public static final class Strings extends BinaryScriptDocValues { + public static final class BytesRefs extends ScriptDocValues { + private final SortedBinaryDocValues in; + private BytesRef[] values = new BytesRef[0]; + private int count; - public Strings(SortedBinaryDocValues in) { - super(in); + public BytesRefs(SortedBinaryDocValues in) { + this.in = in; } @Override - public String get(int index) { - return values[index].get().utf8ToString(); - } - - public BytesRef getBytesValue() { - if (size() > 0) { - /** - * We need to make a copy here because {@link BinaryScriptDocValues} might reuse the - * returned value and the same instance might be used to - * return values from multiple documents. - **/ - return values[0].toBytesRef(); - } else { - return null; - } - } - - public String getValue() { - BytesRef value = getBytesValue(); - if (value == null) { - return null; + public void setNextDocId(int docId) throws IOException { + if (in.advanceExact(docId)) { + resize(in.docValueCount()); + for (int i = 0; i < count; i++) { + /** + * We need to make a copy here because {@link SortedBinaryDocValues} might reuse the returned value + * and the same instance might be used to return values from multiple documents. + **/ + values[i] = BytesRef.deepCopyOf(in.nextValue()); + } } else { - return value.utf8ToString(); + resize(0); } } - } - - public static final class BytesRefs extends BinaryScriptDocValues { - - public BytesRefs(SortedBinaryDocValues in) { - super(in); - } @Override public BytesRef get(int index) { - /** - * We need to make a copy here because {@link BinaryScriptDocValues} might reuse the - * returned value and the same instance might be used to - * return values from multiple documents. - **/ - return values[index].toBytesRef(); + return values[index]; } public BytesRef getValue() { - if (count == 0) { - return new BytesRef(); - } - return values[0].toBytesRef(); + return count == 0 ? new BytesRef() : values[0]; + } + + /** + * Set the {@link #size()} and ensure that the {@link #values} array can + * store at least that many entries. + */ + protected void resize(int newSize) { + values = ArrayUtil.grow(values, newSize); + count = newSize; + } + + @Override + public int size() { + return count; } } From 6d356f08fb4ce6d40c66d92e62d8768d442a058d Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 19 Apr 2018 16:01:11 +0200 Subject: [PATCH 2/2] copy values lazily in ScriptDocValues.Strings to avoid unnecessary utf8 conversion --- .../index/fielddata/ScriptDocValues.java | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java index 21beb0c4b728e..a7fd90bf85e9a 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java @@ -35,6 +35,7 @@ import org.joda.time.ReadableDateTime; import java.io.IOException; +import java.io.UncheckedIOException; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.AbstractList; @@ -570,13 +571,13 @@ private static boolean[] grow(boolean[] array, int minSize) { } else return array; } - } public static final class Strings extends ScriptDocValues { private final SortedBinaryDocValues in; private String[] values = new String[0]; private int count; + private boolean valuesSet = false; public Strings(SortedBinaryDocValues in) { this.in = in; @@ -584,6 +585,13 @@ public Strings(SortedBinaryDocValues in) { @Override public String get(int index) { + if (valuesSet == false) { + try { + fillValues(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } return values[index]; } @@ -600,15 +608,23 @@ private void resize(int newSize) { count = newSize; } + private void fillValues() throws IOException { + assert valuesSet == false; + resize(count); + for (int i = 0; i < count; i++) { + values[i] = in.nextValue().utf8ToString(); + } + valuesSet = true; + } + @Override public void setNextDocId(int docId) throws IOException { if (in.advanceExact(docId)) { - resize(in.docValueCount()); - for (int i = 0; i < count; i++) { - values[i] = in.nextValue().utf8ToString(); - } + count = in.docValueCount(); + valuesSet = false; } else { resize(0); + valuesSet = true; } }