Skip to content

Commit 6fafd2e

Browse files
committed
Add LZ4 compressor with Java9 perf improvements (elastic#77153)
Java9 added a number of features that are useful to improve compression and decompression. These include the Arrays#mismatch method and VarHandles. This commit adds compression tools forked from the java-lz4 library which include these improvements. We hope to contribute these changes back to the original project, however the project currently supports Java7 so this is not possible at the moment.
1 parent 76859ec commit 6fafd2e

File tree

24 files changed

+3720
-13
lines changed

24 files changed

+3720
-13
lines changed

build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/LicenseHeadersTask.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public void setExcludes(List<String> excludes) {
110110
* Allowed license families for this project.
111111
*/
112112
@Input
113-
private List<String> approvedLicenses = new ArrayList<String>(Arrays.asList("SSPL+Elastic License", "Generated", "Vendored"));
113+
private List<String> approvedLicenses = new ArrayList<String>(Arrays.asList("SSPL+Elastic License", "Generated", "Vendored", "Apache LZ4-Java"));
114114
/**
115115
* Files that should be excluded from the license header check. Use with extreme care, only in situations where the license on the
116116
* source file is compatible with the codebase but we do not want to add the license to the list of approved headers (to avoid the
@@ -154,6 +154,8 @@ public void runRat() {
154154
matchers.add(subStringMatcher("BSD4 ", "Original BSD License (with advertising clause)", "All advertising materials"));
155155
// Apache
156156
matchers.add(subStringMatcher("AL ", "Apache", "Licensed to Elasticsearch B.V. under one or more contributor"));
157+
// Apache lz4-java
158+
matchers.add(subStringMatcher("ALLZ4", "Apache LZ4-Java", "Copyright 2020 Adrien Grand and the lz4-java contributors"));
157159
// Generated resources
158160
matchers.add(subStringMatcher("GEN ", "Generated", "ANTLR GENERATED CODE"));
159161
// Vendored Code

build-tools-internal/src/main/groovy/elasticsearch.formatting.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def projectPathsToExclude = [
5555
':libs:elasticsearch-dissect',
5656
':libs:elasticsearch-geo',
5757
':libs:elasticsearch-grok',
58+
':libs:elasticsearch-lz4',
5859
':libs:elasticsearch-nio',
5960
':libs:elasticsearch-plugin-classloader',
6061
':libs:elasticsearch-secure-sm',

libs/lz4/build.gradle

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
apply plugin: 'elasticsearch.publish'
9+
10+
dependencies {
11+
api 'org.lz4:lz4-java:1.8.0'
12+
api project(':libs:elasticsearch-core')
13+
14+
testImplementation(project(":test:framework")) {
15+
exclude group: 'org.elasticsearch', module: 'elasticsearch-lz4'
16+
}
17+
}
18+
19+
tasks.named("forbiddenPatterns").configure {
20+
exclude '**/*.binary'
21+
}
22+
23+
tasks.named('forbiddenApisMain').configure {
24+
// lz4 does not depend on core, so only jdk signatures should be checked
25+
replaceSignatureFiles 'jdk-signatures'
26+
}
27+
28+
tasks.named("thirdPartyAudit").configure {
29+
ignoreViolations(
30+
// from java-lz4
31+
'net.jpountz.util.UnsafeUtils'
32+
)
33+
}
File renamed without changes.
File renamed without changes.
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/*
2+
* Copyright 2020 Adrien Grand and the lz4-java contributors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
*/
17+
package org.elasticsearch.lz4;
18+
19+
import net.jpountz.lz4.LZ4Compressor;
20+
import net.jpountz.lz4.LZ4Exception;
21+
22+
import java.nio.ByteBuffer;
23+
import java.util.Arrays;
24+
25+
/**
26+
* This file is forked from https://github.com/lz4/lz4-java. In particular, it forks the following file
27+
* net.jpountz.lz4.LZ4JavaSafeCompressor.
28+
*
29+
* It modifies the original implementation to use custom LZ4SafeUtils and SafeUtils implementations which
30+
* include performance improvements. Additionally, instead of allocating a new hashtable for each compress
31+
* call, it reuses thread-local hashtables. Comments are included to mark the changes.
32+
*/
33+
public class ESLZ4Compressor extends LZ4Compressor {
34+
35+
// Modified to add thread-local hash tables
36+
private static final ThreadLocal<short[]> sixtyFourKBHashTable = ThreadLocal.withInitial(() -> new short[8192]);
37+
private static final ThreadLocal<int[]> biggerHashTable = ThreadLocal.withInitial(() -> new int[4096]);
38+
39+
public static final LZ4Compressor INSTANCE = new ESLZ4Compressor();
40+
41+
ESLZ4Compressor() {
42+
}
43+
44+
static int compress64k(byte[] src, int srcOff, int srcLen, byte[] dest, int destOff, int destEnd) {
45+
int srcEnd = srcOff + srcLen;
46+
int srcLimit = srcEnd - 5;
47+
int mflimit = srcEnd - 12;
48+
int dOff = destOff;
49+
int anchor = srcOff;
50+
if (srcLen >= 13) {
51+
// Modified to use thread-local hash table
52+
short[] hashTable = sixtyFourKBHashTable.get();
53+
Arrays.fill(hashTable, (short) 0);
54+
int sOff = srcOff + 1;
55+
56+
label53:
57+
while(true) {
58+
int forwardOff = sOff;
59+
int step = 1;
60+
int var16 = 1 << LZ4Constants.SKIP_STRENGTH;
61+
62+
int ref;
63+
int excess;
64+
do {
65+
sOff = forwardOff;
66+
forwardOff += step;
67+
step = var16++ >>> LZ4Constants.SKIP_STRENGTH;
68+
if (forwardOff > mflimit) {
69+
break label53;
70+
}
71+
72+
excess = LZ4Utils.hash64k(SafeUtils.readInt(src, sOff));
73+
ref = srcOff + SafeUtils.readShort(hashTable, excess);
74+
SafeUtils.writeShort(hashTable, excess, sOff - srcOff);
75+
// Modified to use explicit == false
76+
} while(LZ4SafeUtils.readIntEquals(src, ref, sOff) == false);
77+
78+
excess = LZ4SafeUtils.commonBytesBackward(src, ref, sOff, srcOff, anchor);
79+
sOff -= excess;
80+
ref -= excess;
81+
int runLen = sOff - anchor;
82+
int tokenOff = dOff++;
83+
if (dOff + runLen + 8 + (runLen >>> 8) > destEnd) {
84+
throw new LZ4Exception("maxDestLen is too small");
85+
}
86+
87+
if (runLen >= 15) {
88+
SafeUtils.writeByte(dest, tokenOff, 240);
89+
dOff = LZ4SafeUtils.writeLen(runLen - 15, dest, dOff);
90+
} else {
91+
SafeUtils.writeByte(dest, tokenOff, runLen << 4);
92+
}
93+
94+
LZ4SafeUtils.wildArraycopy(src, anchor, dest, dOff, runLen);
95+
dOff += runLen;
96+
97+
while(true) {
98+
SafeUtils.writeShortLE(dest, dOff, (short)(sOff - ref));
99+
dOff += 2;
100+
sOff += 4;
101+
ref += 4;
102+
int matchLen = LZ4SafeUtils.commonBytes(src, ref, sOff, srcLimit);
103+
if (dOff + 6 + (matchLen >>> 8) > destEnd) {
104+
throw new LZ4Exception("maxDestLen is too small");
105+
}
106+
107+
sOff += matchLen;
108+
if (matchLen >= 15) {
109+
SafeUtils.writeByte(dest, tokenOff, SafeUtils.readByte(dest, tokenOff) | 15);
110+
dOff = LZ4SafeUtils.writeLen(matchLen - 15, dest, dOff);
111+
} else {
112+
SafeUtils.writeByte(dest, tokenOff, SafeUtils.readByte(dest, tokenOff) | matchLen);
113+
}
114+
115+
if (sOff > mflimit) {
116+
anchor = sOff;
117+
break label53;
118+
}
119+
120+
SafeUtils.writeShort(hashTable, LZ4Utils.hash64k(SafeUtils.readInt(src, sOff - 2)), sOff - 2 - srcOff);
121+
int h = LZ4Utils.hash64k(SafeUtils.readInt(src, sOff));
122+
ref = srcOff + SafeUtils.readShort(hashTable, h);
123+
SafeUtils.writeShort(hashTable, h, sOff - srcOff);
124+
// Modified to use explicit == false
125+
if (LZ4SafeUtils.readIntEquals(src, sOff, ref) == false) {
126+
anchor = sOff++;
127+
break;
128+
}
129+
130+
tokenOff = dOff++;
131+
SafeUtils.writeByte(dest, tokenOff, 0);
132+
}
133+
}
134+
}
135+
136+
dOff = LZ4SafeUtils.lastLiterals(src, anchor, srcEnd - anchor, dest, dOff, destEnd);
137+
return dOff - destOff;
138+
}
139+
140+
public int compress(byte[] src, int srcOff, int srcLen, byte[] dest, int destOff, int maxDestLen) {
141+
SafeUtils.checkRange(src, srcOff, srcLen);
142+
SafeUtils.checkRange(dest, destOff, maxDestLen);
143+
int destEnd = destOff + maxDestLen;
144+
if (srcLen < 65547) {
145+
return compress64k(src, srcOff, srcLen, dest, destOff, destEnd);
146+
} else {
147+
int srcEnd = srcOff + srcLen;
148+
int srcLimit = srcEnd - 5;
149+
int mflimit = srcEnd - 12;
150+
int dOff = destOff;
151+
int sOff = srcOff + 1;
152+
int anchor = srcOff;
153+
// Modified to use thread-local hash table
154+
int[] hashTable = biggerHashTable.get();
155+
Arrays.fill(hashTable, srcOff);
156+
157+
label63:
158+
while(true) {
159+
int forwardOff = sOff;
160+
int step = 1;
161+
int var18 = 1 << LZ4Constants.SKIP_STRENGTH;
162+
163+
while(true) {
164+
sOff = forwardOff;
165+
forwardOff += step;
166+
step = var18++ >>> LZ4Constants.SKIP_STRENGTH;
167+
if (forwardOff <= mflimit) {
168+
int excess = LZ4Utils.hash(SafeUtils.readInt(src, sOff));
169+
int ref = SafeUtils.readInt(hashTable, excess);
170+
int back = sOff - ref;
171+
SafeUtils.writeInt(hashTable, excess, sOff);
172+
// Modified to use explicit == false
173+
if (back >= 65536 || LZ4SafeUtils.readIntEquals(src, ref, sOff) == false) {
174+
continue;
175+
}
176+
177+
excess = LZ4SafeUtils.commonBytesBackward(src, ref, sOff, srcOff, anchor);
178+
sOff -= excess;
179+
ref -= excess;
180+
int runLen = sOff - anchor;
181+
int tokenOff = dOff++;
182+
if (dOff + runLen + 8 + (runLen >>> 8) > destEnd) {
183+
throw new LZ4Exception("maxDestLen is too small");
184+
}
185+
186+
if (runLen >= 15) {
187+
SafeUtils.writeByte(dest, tokenOff, 240);
188+
dOff = LZ4SafeUtils.writeLen(runLen - 15, dest, dOff);
189+
} else {
190+
SafeUtils.writeByte(dest, tokenOff, runLen << 4);
191+
}
192+
193+
LZ4SafeUtils.wildArraycopy(src, anchor, dest, dOff, runLen);
194+
dOff += runLen;
195+
196+
while(true) {
197+
SafeUtils.writeShortLE(dest, dOff, back);
198+
dOff += 2;
199+
sOff += 4;
200+
int matchLen = LZ4SafeUtils.commonBytes(src, ref + 4, sOff, srcLimit);
201+
if (dOff + 6 + (matchLen >>> 8) > destEnd) {
202+
throw new LZ4Exception("maxDestLen is too small");
203+
}
204+
205+
sOff += matchLen;
206+
if (matchLen >= 15) {
207+
SafeUtils.writeByte(dest, tokenOff, SafeUtils.readByte(dest, tokenOff) | 15);
208+
dOff = LZ4SafeUtils.writeLen(matchLen - 15, dest, dOff);
209+
} else {
210+
SafeUtils.writeByte(dest, tokenOff, SafeUtils.readByte(dest, tokenOff) | matchLen);
211+
}
212+
213+
if (sOff > mflimit) {
214+
anchor = sOff;
215+
break;
216+
}
217+
218+
SafeUtils.writeInt(hashTable, LZ4Utils.hash(SafeUtils.readInt(src, sOff - 2)), sOff - 2);
219+
int h = LZ4Utils.hash(SafeUtils.readInt(src, sOff));
220+
ref = SafeUtils.readInt(hashTable, h);
221+
SafeUtils.writeInt(hashTable, h, sOff);
222+
back = sOff - ref;
223+
// Modified to use explicit == false
224+
if (back >= 65536 || LZ4SafeUtils.readIntEquals(src, ref, sOff) == false) {
225+
anchor = sOff++;
226+
continue label63;
227+
}
228+
229+
tokenOff = dOff++;
230+
SafeUtils.writeByte(dest, tokenOff, 0);
231+
}
232+
}
233+
234+
dOff = LZ4SafeUtils.lastLiterals(src, anchor, srcEnd - anchor, dest, dOff, destEnd);
235+
return dOff - destOff;
236+
}
237+
}
238+
}
239+
}
240+
241+
@Override
242+
public int compress(ByteBuffer src, int srcOff, int srcLen, ByteBuffer dest, int destOff, int maxDestLen) {
243+
if (src.hasArray() && dest.hasArray()) {
244+
return this.compress(src.array(), srcOff + src.arrayOffset(), srcLen, dest.array(), destOff + dest.arrayOffset(), maxDestLen);
245+
} else {
246+
throw new AssertionError("Do not support compression on direct buffers");
247+
}
248+
}
249+
}

0 commit comments

Comments
 (0)