Skip to content

Commit fdbef48

Browse files
authored
Refactor TSDB doc_values util allow introduce new codec (#115042) (#115159)
This PR refactors the doc_values utils used in the TSDB codec to allow sharing between the current codec and the new codec.
1 parent 682ed39 commit fdbef48

File tree

7 files changed

+42
-48
lines changed

7 files changed

+42
-48
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/AbstractDocValuesForUtilBenchmark.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public abstract class AbstractDocValuesForUtilBenchmark {
2121
protected final int blockSize;
2222

2323
public AbstractDocValuesForUtilBenchmark() {
24-
this.forUtil = new DocValuesForUtil();
24+
this.forUtil = new DocValuesForUtil(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
2525
this.blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
2626
}
2727

server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java

+3-8
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,10 @@ public class DocValuesForUtil {
2222
private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE;
2323
private static final int BITS_IN_SEVEN_BYTES = 7 * Byte.SIZE;
2424
private final int blockSize;
25-
private final byte[] encoded;
25+
private final byte[] encoded = new byte[1024];
2626

27-
public DocValuesForUtil() {
28-
this(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
29-
}
30-
31-
private DocValuesForUtil(int blockSize) {
32-
this.blockSize = blockSize;
33-
this.encoded = new byte[1024];
27+
public DocValuesForUtil(int numericBlockSize) {
28+
this.blockSize = numericBlockSize;
3429
}
3530

3631
public static int roundBits(int bitsPerValue) {

server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, lon
144144
if (maxOrd != 1) {
145145
final long[] buffer = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
146146
int bufferSize = 0;
147-
final ES87TSDBDocValuesEncoder encoder = new ES87TSDBDocValuesEncoder();
147+
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
148148
values = valuesProducer.getSortedNumeric(field);
149149
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
150150
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {

server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,7 @@ public long longValue() {
965965

966966
private final int maxDoc = ES87TSDBDocValuesProducer.this.maxDoc;
967967
private int doc = -1;
968-
private final ES87TSDBDocValuesEncoder decoder = new ES87TSDBDocValuesEncoder();
968+
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
969969
private long currentBlockIndex = -1;
970970
private final long[] currentBlock = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
971971

@@ -1030,7 +1030,7 @@ public long longValue() throws IOException {
10301030
);
10311031
return new NumericDocValues() {
10321032

1033-
private final ES87TSDBDocValuesEncoder decoder = new ES87TSDBDocValuesEncoder();
1033+
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
10341034
private long currentBlockIndex = -1;
10351035
private final long[] currentBlock = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
10361036

@@ -1092,7 +1092,7 @@ private NumericValues getValues(NumericEntry entry, final long maxOrd) throws IO
10921092
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
10931093
return new NumericValues() {
10941094

1095-
private final ES87TSDBDocValuesEncoder decoder = new ES87TSDBDocValuesEncoder();
1095+
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
10961096
private long currentBlockIndex = -1;
10971097
private final long[] currentBlock = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
10981098

server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java renamed to server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBDocValuesEncoder.java

+21-19
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444
* </li>
4545
* </ul>
4646
*
47-
* Notice that encoding and decoding are written in a nested way, for instance {@link ES87TSDBDocValuesEncoder#deltaEncode} calling
48-
* {@link ES87TSDBDocValuesEncoder#removeOffset} and so on. This allows us to easily introduce new encoding schemes or remove existing
47+
* Notice that encoding and decoding are written in a nested way, for instance {@link TSDBDocValuesEncoder#deltaEncode} calling
48+
* {@link TSDBDocValuesEncoder#removeOffset} and so on. This allows us to easily introduce new encoding schemes or remove existing
4949
* (non-effective) encoding schemes in a backward-compatible way.
5050
*
5151
* A token is used as a bitmask to represent which encoding is applied and allows us to detect the applied encoding scheme at decoding time.
@@ -54,11 +54,13 @@
5454
*
5555
* Of course, decoding follows the opposite order with respect to encoding.
5656
*/
57-
public class ES87TSDBDocValuesEncoder {
57+
public class TSDBDocValuesEncoder {
5858
private final DocValuesForUtil forUtil;
59+
private final int numericBlockSize;
5960

60-
public ES87TSDBDocValuesEncoder() {
61-
this.forUtil = new DocValuesForUtil();
61+
public TSDBDocValuesEncoder(int numericBlockSize) {
62+
this.forUtil = new DocValuesForUtil(numericBlockSize);
63+
this.numericBlockSize = numericBlockSize;
6264
}
6365

6466
/**
@@ -68,7 +70,7 @@ public ES87TSDBDocValuesEncoder() {
6870
private void deltaEncode(int token, int tokenBits, long[] in, DataOutput out) throws IOException {
6971
int gts = 0;
7072
int lts = 0;
71-
for (int i = 1; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
73+
for (int i = 1; i < numericBlockSize; ++i) {
7274
if (in[i] > in[i - 1]) {
7375
gts++;
7476
} else if (in[i] < in[i - 1]) {
@@ -79,7 +81,7 @@ private void deltaEncode(int token, int tokenBits, long[] in, DataOutput out) th
7981
final boolean doDeltaCompression = (gts == 0 && lts >= 2) || (lts == 0 && gts >= 2);
8082
long first = 0;
8183
if (doDeltaCompression) {
82-
for (int i = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - 1; i > 0; --i) {
84+
for (int i = numericBlockSize - 1; i > 0; --i) {
8385
in[i] -= in[i - 1];
8486
}
8587
// Avoid setting in[0] to 0 in case there is a minimum interval between
@@ -115,7 +117,7 @@ private void removeOffset(int token, int tokenBits, long[] in, DataOutput out) t
115117
}
116118

117119
if (min != 0) {
118-
for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
120+
for (int i = 0; i < numericBlockSize; ++i) {
119121
in[i] -= min;
120122
}
121123
token = (token << 1) | 0x01;
@@ -143,7 +145,7 @@ private void gcdEncode(int token, int tokenBits, long[] in, DataOutput out) thro
143145
}
144146
final boolean doGcdCompression = Long.compareUnsigned(gcd, 1) > 0;
145147
if (doGcdCompression) {
146-
for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
148+
for (int i = 0; i < numericBlockSize; ++i) {
147149
in[i] /= gcd;
148150
}
149151
token = (token << 1) | 0x01;
@@ -174,7 +176,7 @@ private void forEncode(int token, int tokenBits, long[] in, DataOutput out) thro
174176
* Encode the given longs using a combination of delta-coding, GCD factorization and bit packing.
175177
*/
176178
void encode(long[] in, DataOutput out) throws IOException {
177-
assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
179+
assert in.length == numericBlockSize;
178180

179181
deltaEncode(0, 0, in, out);
180182
}
@@ -192,7 +194,7 @@ void encode(long[] in, DataOutput out) throws IOException {
192194
* </ul>
193195
*/
194196
void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
195-
assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
197+
assert in.length == numericBlockSize;
196198
int numRuns = 1;
197199
long firstValue = in[0];
198200
long previousValue = firstValue;
@@ -259,7 +261,7 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio
259261
}
260262

261263
void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException {
262-
assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length;
264+
assert out.length == numericBlockSize : out.length;
263265

264266
long v1 = in.readVLong();
265267
int encoding = Long.numberOfTrailingZeros(~v1);
@@ -293,7 +295,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException
293295

294296
/** Decode longs that have been encoded with {@link #encode}. */
295297
void decode(DataInput in, long[] out) throws IOException {
296-
assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length;
298+
assert out.length == numericBlockSize : out.length;
297299

298300
final int token = in.readVInt();
299301
final int bitsPerValue = token >>> 3;
@@ -330,21 +332,21 @@ void decode(DataInput in, long[] out) throws IOException {
330332
}
331333

332334
// this loop should auto-vectorize
333-
private static void mul(long[] arr, long m) {
334-
for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
335+
private void mul(long[] arr, long m) {
336+
for (int i = 0; i < numericBlockSize; ++i) {
335337
arr[i] *= m;
336338
}
337339
}
338340

339341
// this loop should auto-vectorize
340-
private static void add(long[] arr, long min) {
341-
for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
342+
private void add(long[] arr, long min) {
343+
for (int i = 0; i < numericBlockSize; ++i) {
342344
arr[i] += min;
343345
}
344346
}
345347

346-
private static void deltaDecode(long[] arr) {
347-
for (int i = 1; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
348+
private void deltaDecode(long[] arr) {
349+
for (int i = 1; i < numericBlockSize; ++i) {
348350
arr[i] += arr[i - 1];
349351
}
350352
}

server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java

+11-14
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,18 @@
3131
import java.util.Random;
3232

3333
public class DocValuesForUtilTests extends LuceneTestCase {
34+
int NUMERIC_BLOCK_SIZE = 1 << 7;
3435

3536
public void testEncodeDecode() throws IOException {
3637
final int iterations = RandomNumbers.randomIntBetween(random(), 50, 1000);
37-
final long[] values = new long[iterations * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
38+
final long[] values = new long[iterations * NUMERIC_BLOCK_SIZE];
3839
final int[] bpvs = new int[iterations];
3940

4041
for (int i = 0; i < iterations; ++i) {
4142
final int bpv = TestUtil.nextInt(random(), 1, 64);
4243
bpvs[i] = DocValuesForUtil.roundBits(bpv);
43-
for (int j = 0; j < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++j) {
44-
values[i * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE + j] = bpv == 64
44+
for (int j = 0; j < NUMERIC_BLOCK_SIZE; ++j) {
45+
values[i * NUMERIC_BLOCK_SIZE + j] = bpv == 64
4546
? random().nextLong()
4647
: TestUtil.nextLong(random(), 0, PackedInts.maxValue(bpv));
4748
}
@@ -53,12 +54,12 @@ public void testEncodeDecode() throws IOException {
5354
{
5455
// encode
5556
IndexOutput out = d.createOutput("test.bin", IOContext.DEFAULT);
56-
final DocValuesForUtil forUtil = new DocValuesForUtil();
57+
final DocValuesForUtil forUtil = new DocValuesForUtil(NUMERIC_BLOCK_SIZE);
5758

5859
for (int i = 0; i < iterations; ++i) {
59-
long[] source = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
60-
for (int j = 0; j < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++j) {
61-
source[j] = values[i * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE + j];
60+
long[] source = new long[NUMERIC_BLOCK_SIZE];
61+
for (int j = 0; j < NUMERIC_BLOCK_SIZE; ++j) {
62+
source[j] = values[i * NUMERIC_BLOCK_SIZE + j];
6263
}
6364
out.writeByte((byte) bpvs[i]);
6465
forUtil.encode(source, bpvs[i], out);
@@ -70,18 +71,14 @@ public void testEncodeDecode() throws IOException {
7071
{
7172
// decode
7273
IndexInput in = d.openInput("test.bin", IOContext.READONCE);
73-
final DocValuesForUtil forUtil = new DocValuesForUtil();
74-
final long[] restored = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
74+
final DocValuesForUtil forUtil = new DocValuesForUtil(NUMERIC_BLOCK_SIZE);
75+
final long[] restored = new long[NUMERIC_BLOCK_SIZE];
7576
for (int i = 0; i < iterations; ++i) {
7677
final int bitsPerValue = in.readByte();
7778
forUtil.decode(bitsPerValue, in, restored);
7879
assertArrayEquals(
7980
Arrays.toString(restored),
80-
ArrayUtil.copyOfSubArray(
81-
values,
82-
i * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE,
83-
(i + 1) * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE
84-
),
81+
ArrayUtil.copyOfSubArray(values, i * NUMERIC_BLOCK_SIZE, (i + 1) * NUMERIC_BLOCK_SIZE),
8582
restored
8683
);
8784
}

server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323

2424
public class ES87TSDBDocValuesEncoderTests extends LuceneTestCase {
2525

26-
private final ES87TSDBDocValuesEncoder encoder;
26+
private final TSDBDocValuesEncoder encoder;
2727
private final int blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
2828

2929
public ES87TSDBDocValuesEncoderTests() {
30-
this.encoder = new ES87TSDBDocValuesEncoder();
30+
this.encoder = new TSDBDocValuesEncoder(blockSize);
3131
}
3232

3333
public void testRandomValues() throws IOException {

0 commit comments

Comments
 (0)