Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/85008.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 85008
summary: "TSDB: shrink `_id` inverted index"
area: TSDB
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ generates a consistent id:
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:52:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2001818691, "rx": 802133794}}}}'
- match: {items.0.index._id: cZZNs4NdV58ePSPI8-3SGXkBAAA}
- match: {items.0.index._id: cZZNs4NdV58ePSPIAAABeRnS7fM}

- do:
bulk:
Expand All @@ -85,7 +85,7 @@ generates a consistent id:
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:52:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2001818691, "rx": 802133794}}}}'
- match: {items.0.index._id: cZZNs4NdV58ePSPI8-3SGXkBAAA}
- match: {items.0.index._id: cZZNs4NdV58ePSPIAAABeRnS7fM}

---
index a new document on top of an old one:
Expand Down Expand Up @@ -124,7 +124,7 @@ index a new document on top of an old one:
network:
tx: 111434595272
rx: 430605511
- match: {_id: cn4exTOUtxytuLkQZv7RGXkBAAA}
- match: {_id: cn4exTOUtxytuLkQAAABeRnR_mY}

- do:
search:
Expand Down Expand Up @@ -169,7 +169,7 @@ index a new document on top of an old one over bulk:
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:51:03.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 111434595272, "rx": 430605511}}}}'
- match: {items.0.index._id: cn4exTOUtxytuLkQZv7RGXkBAAA}
- match: {items.0.index._id: cn4exTOUtxytuLkQAAABeRnR_mY}

- do:
search:
Expand All @@ -193,7 +193,7 @@ create operation on top of old document fails:
reason: id generation changed in 8.2

- do:
catch: "/\\[cn4exTOUtxytuLkQZv7RGXkBAAA\\]\\[\\{.+\\}\\@2021-04-28T18:51:03.142Z\\]: version conflict, document already exists \\(current version \\[1\\]\\)/"
catch: "/\\[cn4exTOUtxytuLkQAAABeRnR_mY\\]\\[\\{.+\\}\\@2021-04-28T18:51:03.142Z\\]: version conflict, document already exists \\(current version \\[1\\]\\)/"
index:
refresh: true
index: test
Expand Down Expand Up @@ -222,7 +222,7 @@ create operation on top of old document fails over bulk:
body:
- '{"create": {}}'
- '{"@timestamp": "2021-04-28T18:51:03.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 111434595272, "rx": 430605511}}}}'
- match: { items.0.create.error.reason: "[cn4exTOUtxytuLkQZv7RGXkBAAA][{k8s.pod.uid=df3145b3-0563-4d3b-a0f7-897eb2876ea9, metricset=pod}@2021-04-28T18:51:03.142Z]: version conflict, document already exists (current version [1])" }
- match: { items.0.create.error.reason: "[cn4exTOUtxytuLkQAAABeRnR_mY][{k8s.pod.uid=df3145b3-0563-4d3b-a0f7-897eb2876ea9, metricset=pod}@2021-04-28T18:51:03.142Z]: version conflict, document already exists (current version [1])" }

---
ids query:
Expand All @@ -238,12 +238,12 @@ ids query:
- field: k8s.pod.network.tx
query:
ids:
values: ["cn4exTOUtxytuLkQZv7RGXkBAAA", "cZZNs4NdV58ePSPIkwPSGXkBAAA"]
values: ["cn4exTOUtxytuLkQAAABeRnR_mY", "cZZNs4NdV58ePSPIAAABeRnSA5M"]
sort: ["@timestamp"]
- match: {hits.total.value: 2}
- match: {hits.hits.0._id: "cn4exTOUtxytuLkQZv7RGXkBAAA"}
- match: {hits.hits.0._id: "cn4exTOUtxytuLkQAAABeRnR_mY"}
- match: {hits.hits.0.fields.k8s\.pod\.network\.tx: [1434595272]}
- match: {hits.hits.1._id: "cZZNs4NdV58ePSPIkwPSGXkBAAA"}
- match: {hits.hits.1._id: "cZZNs4NdV58ePSPIAAABeRnSA5M"}
- match: {hits.hits.1.fields.k8s\.pod\.network\.tx: [2012916202]}

---
Expand All @@ -255,9 +255,9 @@ get:
- do:
get:
index: test
id: cZZNs4NdV58ePSPIkwPSGXkBAAA
id: cZZNs4NdV58ePSPIAAABeRnSA5M
- match: {_index: test}
- match: {_id: cZZNs4NdV58ePSPIkwPSGXkBAAA}
- match: {_id: cZZNs4NdV58ePSPIAAABeRnSA5M}
- match:
_source:
"@timestamp": "2021-04-28T18:51:04.467Z"
Expand Down Expand Up @@ -293,7 +293,7 @@ get with routing:
catch: bad_request
get:
index: test
id: cZZNs4NdV58ePSPIkwPSGXkBAAA
id: cZZNs4NdV58ePSPIAAABeRnSA5M
routing: routing

---
Expand All @@ -305,7 +305,7 @@ delete:
- do:
delete:
index: test
id: cZZNs4NdV58ePSPIkwPSGXkBAAA
id: cZZNs4NdV58ePSPIAAABeRnSA5M
- match: {result: deleted}

---
Expand Down Expand Up @@ -339,34 +339,12 @@ delete over _bulk:
version: " - 8.1.99"
reason: ids generation changed in 8.2

- do:
bulk:
refresh: true
index: test
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2001818691, "rx": 802133794}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:24.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2005177954, "rx": 801479970}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:44.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2006223737, "rx": 802337279}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:51:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.2", "network": {"tx": 2012916202, "rx": 803685721}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:03.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 1434521831, "rx": 530575198}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:23.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 1434577921, "rx": 530600088}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:53.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 1434587694, "rx": 530604797}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:51:03.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 1434595272, "rx": 530605511}}}}'

- do:
bulk:
index: test
body:
- '{"delete": {"_id": "cn4exTOUtxytuLkQBhTRGXkBAAA"}}'
- '{"delete": {"_id": "cZZNs4NdV58ePSPIkwPSGXkBAAA"}}'
- '{"delete": {"_id": "cn4exTOUtxytuLkQAAABeRnR_mY"}}'
- '{"delete": {"_id": "cZZNs4NdV58ePSPIAAABeRnSA5M"}}'
- '{"delete": {"_id": "not found ++ not found"}}'
- match: {items.0.delete.result: deleted}
- match: {items.1.delete.result: deleted}
Expand Down Expand Up @@ -416,7 +394,7 @@ routing_path matches deep object:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "dim": {"foo": {"bar": {"baz": {"uid": "uid1"}}}}}'
- match: {items.0.index.result: created}
- match: {items.0.index._id: OcEOGaxBa0saxogMMxnRGXkBAAA}
- match: {items.0.index._id: OcEOGaxBa0saxogMAAABeRnRGTM}

---
routing_path matches object:
Expand Down Expand Up @@ -457,4 +435,4 @@ routing_path matches object:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "dim": {"foo": {"uid": "uid1"}}}'
- match: {items.0.index.result: created}
- match: {items.0.index._id: 8bgiqUyQKH6n8noAMxnRGXkBAAA}
- match: {items.0.index._id: 8bgiqUyQKH6n8noAAAABeRnRGTM}
12 changes: 12 additions & 0 deletions server/src/main/java/org/elasticsearch/common/util/ByteUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ public enum ByteUtils {

public static final VarHandle LITTLE_ENDIAN_LONG = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN);

public static final VarHandle BIG_ENDIAN_LONG = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.BIG_ENDIAN);

/** Zig-zag decode. */
public static long zigZagDecode(long n) {
return ((n >>> 1) ^ -(n & 1));
Expand All @@ -41,6 +43,16 @@ public static long readLongLE(byte[] arr, int offset) {
return (long) LITTLE_ENDIAN_LONG.get(arr, offset);
}

/** Write a long in big-endian format. */
public static void writeLongBE(long l, byte[] arr, int offset) {
BIG_ENDIAN_LONG.set(arr, offset, l);
}

/** Write a long in big-endian format. */
public static long readLongBE(byte[] arr, int offset) {
return (long) BIG_ENDIAN_LONG.get(arr, offset);
}

/** Write an int in little-endian format. */
public static void writeIntLE(int l, byte[] arr, int offset) {
LITTLE_ENDIAN_INT.set(arr, offset, l);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public void createField(DocumentParserContext context, BytesRef tsid) {

byte[] suffix = new byte[16];
ByteUtils.writeLongLE(hash.h1, suffix, 0);
ByteUtils.writeLongLE(timestamp, suffix, 8); // TODO compare disk usage for LE and BE on timestamp
ByteUtils.writeLongBE(timestamp, suffix, 8); // Big Ending shrinks the inverted index by ~37%

IndexRouting.ExtractFromSource indexRouting = (IndexRouting.ExtractFromSource) context.indexSettings().getIndexRouting();
// TODO it'd be way faster to use the fields that we've extract here rather than the source or parse the tsid
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ private byte[] readLongLEHelper(long number, int offset) {
return arr;
}

private byte[] readLongBEHelper(long number, int offset) {
byte[] arr = new byte[8];
ByteUtils.writeLongBE(number, arr, offset);
return arr;
}

public void testLongToBytes() {
assertThat(readLongLEHelper(123456L, 0), is(new byte[] { 64, -30, 1, 0, 0, 0, 0, 0 }));
assertThat(readLongLEHelper(-123456L, 0), is(new byte[] { -64, 29, -2, -1, -1, -1, -1, -1 }));
Expand All @@ -71,6 +77,14 @@ public void testLongToBytes() {
assertThat(readLongLEHelper(Long.MAX_VALUE + 127, 0), is(new byte[] { 126, 0, 0, 0, 0, 0, 0, -128 }));
assertThat(readLongLEHelper(Long.MIN_VALUE - 1, 0), is(new byte[] { -1, -1, -1, -1, -1, -1, -1, 127 }));
assertThat(readLongLEHelper(Long.MIN_VALUE - 127, 0), is(new byte[] { -127, -1, -1, -1, -1, -1, -1, 127 }));

assertThat(readLongBEHelper(123456L, 0), is(new byte[] { 0, 0, 0, 0, 0, 1, -30, 64 }));
assertThat(readLongBEHelper(-123456L, 0), is(new byte[] { -1, -1, -1, -1, -1, -2, 29, -64 }));
assertThat(readLongBEHelper(0L, 0), is(new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }));
assertThat(readLongBEHelper(Long.MAX_VALUE + 1, 0), is(new byte[] { -128, 0, 0, 0, 0, 0, 0, 0 }));
assertThat(readLongBEHelper(Long.MAX_VALUE + 127, 0), is(new byte[] { -128, 0, 0, 0, 0, 0, 0, 126 }));
assertThat(readLongBEHelper(Long.MIN_VALUE - 1, 0), is(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }));
assertThat(readLongBEHelper(Long.MIN_VALUE - 127, 0), is(new byte[] { 127, -1, -1, -1, -1, -1, -1, -127 }));
}

public void testBytesToLong() {
Expand All @@ -80,10 +94,21 @@ public void testBytesToLong() {
assertThat(ByteUtils.readLongLE(new byte[] { 0, 0, 0, 0, 0, 0, 0, -128 }, 0), is(Long.MIN_VALUE));
assertThat(ByteUtils.readLongLE(new byte[] { 126, 0, 0, 0, 0, 0, 0, -128 }, 0), is(Long.MIN_VALUE + 127 - 1));
assertThat(ByteUtils.readLongLE(new byte[] { -1, -1, -1, -1, -1, -1, -1, 127 }, 0), is(Long.MAX_VALUE));
assertThat(ByteUtils.readLongLE(new byte[] { -127, -1, -1, -1, -1, -1, -1, 127, 0 }, 0), is(Long.MAX_VALUE - 127 + 1));
assertThat(ByteUtils.readLongLE(new byte[] { -127, -1, -1, -1, -1, -1, -1, 127, randomByte() }, 0), is(Long.MAX_VALUE - 127 + 1));

assertThat(ByteUtils.readLongLE(new byte[] { randomByte(), 64, -30, 1, 0, 0, 0, 0, 0 }, 1), is(123456L));
assertThat(ByteUtils.readLongLE(new byte[] { randomByte(), -64, 29, -2, -1, -1, -1, -1, -1 }, 1), is(-123456L));

assertThat(ByteUtils.readLongBE(new byte[] { 0, 0, 0, 0, 0, 1, -30, 64 }, 0), is(123456L));
assertThat(ByteUtils.readLongBE(new byte[] { -1, -1, -1, -1, -1, -2, 29, -64 }, 0), is(-123456L));
assertThat(ByteUtils.readLongBE(new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, 0), is(0L));
assertThat(ByteUtils.readLongBE(new byte[] { -128, 0, 0, 0, 0, 0, 0, 0 }, 0), is(Long.MIN_VALUE));
assertThat(ByteUtils.readLongBE(new byte[] { -128, 0, 0, 0, 0, 0, 0, 126 }, 0), is(Long.MIN_VALUE + 127 - 1));
assertThat(ByteUtils.readLongBE(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }, 0), is(Long.MAX_VALUE));
assertThat(ByteUtils.readLongBE(new byte[] { 127, -1, -1, -1, -1, -1, -1, -127, randomByte() }, 0), is(Long.MAX_VALUE - 127 + 1));

assertThat(ByteUtils.readLongLE(new byte[] { 100, 64, -30, 1, 0, 0, 0, 0, 0 }, 1), is(123456L));
assertThat(ByteUtils.readLongLE(new byte[] { -100, -64, 29, -2, -1, -1, -1, -1, -1 }, 1), is(-123456L));
assertThat(ByteUtils.readLongBE(new byte[] { randomByte(), 0, 0, 0, 0, 0, 1, -30, 64 }, 1), is(123456L));
assertThat(ByteUtils.readLongBE(new byte[] { randomByte(), -1, -1, -1, -1, -1, -2, 29, -64 }, 1), is(-123456L));
}

private byte[] readIntLEHelper(int number, int offset) {
Expand Down
Loading