Skip to content

Commit 0aebbb5

Browse files
authored
[8.x] Add support for multi-value dimensions (#112645) (#113369)
* Add support for multi-value dimensions (#112645) Closes #110387 Having this in now affords us not having to introduce version checks in the ES exporter later. We can simply use the same serialization logic for metric attributes as we do for other signals. This also enables us to properly map `*.ip` fields to the ip field type as ip fields containing a list of IPs are not converted to a comma-separated list. (cherry picked from commit 8d223cb) # Conflicts: # server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java * Remove skip test for 8.x This was just needed for 8.x to 9.0 compatibility tests
1 parent 6664c56 commit 0aebbb5

File tree

18 files changed

+345
-83
lines changed

18 files changed

+345
-83
lines changed

docs/changelog/112645.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 112645
2+
summary: Add support for multi-value dimensions
3+
area: Mapping
4+
type: enhancement
5+
issues:
6+
- 110387

docs/reference/mapping/types/keyword.asciidoc

-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index.
163163
Dimension fields have the following constraints:
164164

165165
* The `doc_values` and `index` mapping parameters must be `true`.
166-
* Field values cannot be an <<array,array or multi-value>>.
167166
// end::dimension[]
168167
* Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints:
169168
** The field cannot use a <<normalizer,`normalizer`>>.

modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml

+80
Original file line numberDiff line numberDiff line change
@@ -1230,3 +1230,83 @@ non string dimension fields:
12301230
- match: { .$idx0name.mappings.properties.attributes.properties.double.time_series_dimension: true }
12311231
- match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' }
12321232
- match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.time_series_dimension: true }
1233+
1234+
---
1235+
multi value dimensions:
1236+
- requires:
1237+
cluster_features: ["routing.multi_value_routing_path"]
1238+
reason: support for multi-value dimensions
1239+
1240+
- do:
1241+
allowed_warnings:
1242+
- "index template [my-dynamic-template] has index patterns [k9s*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [my-dynamic-template] will take precedence during new index creation"
1243+
indices.put_index_template:
1244+
name: my-dynamic-template
1245+
body:
1246+
index_patterns: [k9s*]
1247+
data_stream: {}
1248+
template:
1249+
settings:
1250+
index:
1251+
number_of_shards: 1
1252+
mode: time_series
1253+
time_series:
1254+
start_time: 2023-08-31T13:03:08.138Z
1255+
1256+
mappings:
1257+
properties:
1258+
attributes:
1259+
type: passthrough
1260+
dynamic: true
1261+
time_series_dimension: true
1262+
priority: 1
1263+
dynamic_templates:
1264+
- counter_metric:
1265+
mapping:
1266+
type: integer
1267+
time_series_metric: counter
1268+
1269+
- do:
1270+
bulk:
1271+
index: k9s
1272+
refresh: true
1273+
body:
1274+
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
1275+
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "10", "attributes": { "dim1": ["a" , "b"], "dim2": [1, 2] } }'
1276+
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
1277+
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["b" , "a"], "dim2": [1, 2] } }'
1278+
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
1279+
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["c" , "b"], "dim2": [1, 2] } }'
1280+
- is_false: errors
1281+
1282+
- do:
1283+
search:
1284+
index: k9s
1285+
body:
1286+
size: 0
1287+
aggs:
1288+
tsids:
1289+
terms:
1290+
field: _tsid
1291+
1292+
- length: { aggregations.tsids.buckets: 3 } # only the order of the dim1 attribute is different, yet we expect to have two distinct time series
1293+
1294+
- do:
1295+
search:
1296+
index: k9s
1297+
body:
1298+
size: 0
1299+
aggs:
1300+
dims:
1301+
terms:
1302+
field: dim1
1303+
order:
1304+
_key: asc
1305+
1306+
- length: { aggregations.dims.buckets: 3 }
1307+
- match: { aggregations.dims.buckets.0.key: a }
1308+
- match: { aggregations.dims.buckets.0.doc_count: 2 }
1309+
- match: { aggregations.dims.buckets.1.key: b }
1310+
- match: { aggregations.dims.buckets.1.doc_count: 3 }
1311+
- match: { aggregations.dims.buckets.2.key: c }
1312+
- match: { aggregations.dims.buckets.2.doc_count: 1 }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml

+17-14
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,11 @@ missing dimension on routing path field:
119119
type: keyword
120120

121121
---
122-
multi-value routing path field:
122+
multi-value routing path field succeeds:
123123
- requires:
124124
test_runner_features: close_to
125-
cluster_features: ["gte_v8.13.0"]
126-
reason: _tsid hashing introduced in 8.13
125+
cluster_features: ["routing.multi_value_routing_path"]
126+
reason: support for multi-value dimensions
127127

128128
- do:
129129
indices.create:
@@ -172,12 +172,7 @@ multi-value routing path field:
172172
- '{"index": {}}'
173173
- '{"@timestamp": "2021-04-28T18:35:54.467Z", "uid": "df3145b3-0563-4d3b-a0f7-897eb2876ea9", "voltage": 6.8, "unmapped_field": 40, "tag": [ "one", "three" ] }'
174174

175-
- is_true: errors
176-
177-
- match: {items.1.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
178-
- match: {items.3.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
179-
- match: {items.4.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
180-
- match: {items.7.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
175+
- is_false: errors
181176

182177
- do:
183178
search:
@@ -195,13 +190,21 @@ multi-value routing path field:
195190
avg:
196191
field: voltage
197192

198-
- match: {hits.total.value: 4}
199-
- length: {aggregations.tsids.buckets: 2}
193+
- match: {hits.total.value: 8}
194+
- length: {aggregations.tsids.buckets: 4}
200195

201-
- match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" }
196+
- match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSOYoRw_LI4DD7DFEGEJ3NR3eQkMY" }
202197
- match: {aggregations.tsids.buckets.0.doc_count: 2 }
203198
- close_to: {aggregations.tsids.buckets.0.voltage.value: { value: 6.70, error: 0.01 }}
204199

205-
- match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" }
200+
- match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSddqA4WYKglGPR_C0cJe8QGaiC2c" }
206201
- match: {aggregations.tsids.buckets.1.doc_count: 2 }
207-
- close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.30, error: 0.01 }}
202+
- close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.15, error: 0.01 }}
203+
204+
- match: { aggregations.tsids.buckets.2.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" }
205+
- match: {aggregations.tsids.buckets.2.doc_count: 2 }
206+
- close_to: {aggregations.tsids.buckets.2.voltage.value: { value: 6.70, error: 0.01 }}
207+
208+
- match: { aggregations.tsids.buckets.3.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" }
209+
- match: {aggregations.tsids.buckets.3.doc_count: 2 }
210+
- close_to: {aggregations.tsids.buckets.3.voltage.value: { value: 7.30, error: 0.01 }}

server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java

+38-20
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import java.util.ArrayList;
3636
import java.util.Base64;
3737
import java.util.Collections;
38-
import java.util.Iterator;
3938
import java.util.List;
4039
import java.util.Map;
4140
import java.util.Set;
@@ -45,13 +44,15 @@
4544
import java.util.function.Predicate;
4645

4746
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
47+
import static org.elasticsearch.common.xcontent.XContentParserUtils.expectValueToken;
4848

4949
/**
5050
* Generates the shard id for {@code (id, routing)} pairs.
5151
*/
5252
public abstract class IndexRouting {
5353

5454
static final NodeFeature BOOLEAN_ROUTING_PATH = new NodeFeature("routing.boolean_routing_path");
55+
static final NodeFeature MULTI_VALUE_ROUTING_PATH = new NodeFeature("routing.multi_value_routing_path");
5556

5657
/**
5758
* Build the routing from {@link IndexMetadata}.
@@ -301,7 +302,13 @@ public String createId(Map<String, Object> flat, byte[] suffix) {
301302
Builder b = builder();
302303
for (Map.Entry<String, Object> e : flat.entrySet()) {
303304
if (isRoutingPath.test(e.getKey())) {
304-
b.hashes.add(new NameAndHash(new BytesRef(e.getKey()), hash(new BytesRef(e.getValue().toString()))));
305+
if (e.getValue() instanceof List<?> listValue) {
306+
for (Object v : listValue) {
307+
b.addHash(e.getKey(), new BytesRef(v.toString()));
308+
}
309+
} else {
310+
b.addHash(e.getKey(), new BytesRef(e.getValue().toString()));
311+
}
305312
}
306313
}
307314
return b.createId(suffix, IndexRouting.ExtractFromSource::defaultOnEmpty);
@@ -336,7 +343,7 @@ public class Builder {
336343

337344
public void addMatching(String fieldName, BytesRef string) {
338345
if (isRoutingPath.test(fieldName)) {
339-
hashes.add(new NameAndHash(new BytesRef(fieldName), hash(string)));
346+
addHash(fieldName, string);
340347
}
341348
}
342349

@@ -357,6 +364,13 @@ private void extractObject(@Nullable String path, XContentParser source) throws
357364
}
358365
}
359366

367+
private void extractArray(@Nullable String path, XContentParser source) throws IOException {
368+
while (source.currentToken() != Token.END_ARRAY) {
369+
expectValueToken(source.currentToken(), source);
370+
extractItem(path, source);
371+
}
372+
}
373+
360374
private void extractItem(String path, XContentParser source) throws IOException {
361375
switch (source.currentToken()) {
362376
case START_OBJECT:
@@ -367,7 +381,12 @@ private void extractItem(String path, XContentParser source) throws IOException
367381
case VALUE_STRING:
368382
case VALUE_NUMBER:
369383
case VALUE_BOOLEAN:
370-
hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text()))));
384+
addHash(path, new BytesRef(source.text()));
385+
source.nextToken();
386+
break;
387+
case START_ARRAY:
388+
source.nextToken();
389+
extractArray(path, source);
371390
source.nextToken();
372391
break;
373392
case VALUE_NULL:
@@ -376,28 +395,24 @@ private void extractItem(String path, XContentParser source) throws IOException
376395
default:
377396
throw new ParsingException(
378397
source.getTokenLocation(),
379-
"Routing values must be strings but found [{}]",
398+
"Cannot extract routing path due to unexpected token [{}]",
380399
source.currentToken()
381400
);
382401
}
383402
}
384403

404+
private void addHash(String path, BytesRef value) {
405+
hashes.add(new NameAndHash(new BytesRef(path), hash(value), hashes.size()));
406+
}
407+
385408
private int buildHash(IntSupplier onEmpty) {
386-
Collections.sort(hashes);
387-
Iterator<NameAndHash> itr = hashes.iterator();
388-
if (itr.hasNext() == false) {
409+
if (hashes.isEmpty()) {
389410
return onEmpty.getAsInt();
390411
}
391-
NameAndHash prev = itr.next();
392-
int hash = hash(prev.name) ^ prev.hash;
393-
while (itr.hasNext()) {
394-
NameAndHash next = itr.next();
395-
if (prev.name.equals(next.name)) {
396-
throw new IllegalArgumentException("Duplicate routing dimension for [" + next.name + "]");
397-
}
398-
int thisHash = hash(next.name) ^ next.hash;
399-
hash = 31 * hash + thisHash;
400-
prev = next;
412+
Collections.sort(hashes);
413+
int hash = 0;
414+
for (NameAndHash nah : hashes) {
415+
hash = 31 * hash + (hash(nah.name) ^ nah.hash);
401416
}
402417
return hash;
403418
}
@@ -458,10 +473,13 @@ private String error(String operation) {
458473
}
459474
}
460475

461-
private record NameAndHash(BytesRef name, int hash) implements Comparable<NameAndHash> {
476+
private record NameAndHash(BytesRef name, int hash, int order) implements Comparable<NameAndHash> {
462477
@Override
463478
public int compareTo(NameAndHash o) {
464-
return name.compareTo(o.name);
479+
int i = name.compareTo(o.name);
480+
if (i != 0) return i;
481+
// ensures array values are in the order as they appear in the source
482+
return Integer.compare(order, o.order);
465483
}
466484
}
467485
}

server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ public class RoutingFeatures implements FeatureSpecification {
1818

1919
@Override
2020
public Set<NodeFeature> getFeatures() {
21-
return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH);
21+
return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH, IndexRouting.MULTI_VALUE_ROUTING_PATH);
2222
}
2323
}

server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java

+14
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,20 @@ public static void ensureExpectedToken(Token expected, Token actual, XContentPar
7272
}
7373
}
7474

75+
/**
76+
* Makes sure the provided token {@linkplain Token#isValue() is a value type}
77+
*
78+
* @throws ParsingException if the token is not a value type
79+
*/
80+
public static void expectValueToken(Token actual, XContentParser parser) {
81+
if (actual.isValue() == false) {
82+
throw new ParsingException(
83+
parser.getTokenLocation(),
84+
String.format(Locale.ROOT, "Failed to parse object: expecting value token but found [%s]", actual)
85+
);
86+
}
87+
}
88+
7589
private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) {
7690
return new ParsingException(
7791
parser.getTokenLocation(),

0 commit comments

Comments
 (0)