forked from carbon-language/carbon-lang
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathraw_hashtable.h
1730 lines (1549 loc) · 71.1 KB
/
raw_hashtable.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef CARBON_COMMON_RAW_HASHTABLE_H_
#define CARBON_COMMON_RAW_HASHTABLE_H_
#include <algorithm>
#include <concepts>
#include <cstddef>
#include <cstring>
#include <iterator>
#include <new>
#include <type_traits>
#include <utility>
#include "common/check.h"
#include "common/hashing.h"
#include "common/raw_hashtable_metadata_group.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MathExtras.h"
// A namespace collecting a set of low-level utilities for building hashtable
// data structures. These should only be used as implementation details of
// higher-level data-structure APIs.
//
// The utilities here use the `hashtable_key_context.h` provided `KeyContext` to
// support the necessary hashtable operations on keys: hashing and comparison.
// This also serves as the customization point for hashtables built on this
// infrastructure for those operations. See that header file for details.
//
// These utilities support hashtables following a *specific* API design pattern,
// and using Small-Size Optimization, or "SSO", when desired. We expect there to
// be three layers to any hashtable design:
//
// - A *view* type: a read-only view of the hashtable contents. This type should
// be a value type and is expected to be passed by-value in APIs. However, it
// will have `const`-reference semantics, much like a `std::string_view`. Note
// that the *entries* will continue to be mutable, it is only the *table* that
// is read-only.
//
// - A *base* type: a base class type of the actual hashtable, which allows
// almost all mutable operations but erases any specific SSO buffer size.
// Because this is a base of the actual hash table, it is designed to be
// passed as a non-`const` reference or pointer.
//
// - A *table* type: the actual hashtable which derives from the base type and
// adds any desired SSO storage buffer. Beyond the physical storage, it also
// allows resetting the table to its initial state & allocated size, as well
// as copying and moving the table.
//
// For complete examples of the API design, see `set.h` for a hashtable-based
// set data structure, and `map.h` for a hashtable-based map data structure.
//
// The hashtable design implemented here has several key invariants and design
// elements that are essential to all three of the types above and the
// functionality they provide.
//
// - The underlying hashtable uses [open addressing], a power-of-two table size,
// and quadratic probing rather than closed addressing and chaining.
//
// [open addressing]: https://en.wikipedia.org/wiki/Open_addressing
//
// - Each _slot_ in the table corresponds to a key, a value, and one byte of
// metadata. Each _entry_ is a key and value. The key and value for an entry
// are stored together.
//
// - The allocated storage is organized into an array of metadata bytes followed
// by an array of entry storage.
//
// - The metadata byte corresponding to each entry marks that entry is either
// empty, deleted, or present. When present, a 7-bit tag is also stored using
// another 7 bits from the hash of the entry key.
//
// - The storage for an entry is an internal type that should not be exposed to
// users, and instead only the underlying keys and values.
//
// - The hash addressing and probing occurs over *groups* of slots rather than
// individual entries. When inserting a new entry, it can be added to the
// group it hashes to as long it is not full, and can even replace a slot with
// a tombstone indicating a previously deleted entry. Only when the group is
// full will it look at the next group in the probe sequence. As a result,
// there may be entries in a group where a different group is the start of
// that entry's probe sequence. Also, when performing a lookup, every group in
// the probe sequence must be inspected for the lookup key until it is found
// or the group has an empty slot.
//
// - Groups are scanned rapidly using the one-byte metadata for each entry in
// the group and CPU instructions that allow comparing all of the metadata for
// a group in parallel. For more details on the metadata group encoding and
// scanning, see `raw_hashtable_metadata_group.h`.
//
// - `GroupSize` is a platform-specific relatively small power of two that fits
// in some hardware register. However, `MaxGroupSize` is provided as a
// portable max that is also a power of two. The table storage, whether
// provided by an SSO buffer or allocated, is required to be a multiple of
// `MaxGroupSize` to keep the requirement portable but sufficient for all
// platforms.
//
// - There is *always* an allocated table of some multiple of `MaxGroupSize`.
// This allows accesses to be branchless. When heap allocated, we pro-actively
// allocate at least a minimum heap size table. When there is a small-size
// optimization (SSO) buffer, that provides the initial allocation.
//
// - The table performs a minimal amount of bookkeeping that limits the APIs it
// can support:
// - `alloc_size` is the size of the table *allocated* (not *used*), and is
// always a power of 2 at least as big as `MinAllocatedSize`.
// - `storage` is a pointer to the storage for the `alloc_size` slots of the
// table, and never null.
// - `small_alloc_size` is the maximum `alloc_size` where the table is stored
// in the object itself instead of separately on the heap. In this case,
// `storage` points to `small_storage_`.
// - `growth_budget` is the number of entries that may be added before the
// table allocation is doubled. It is always
// `GrowthThresholdForAllocSize(alloc_size)` minus the number of
// non-empty (filled or deleted) slots. If it ever falls to 0, the table
// is grown to keep it greater than 0.
// There is also the "moved-from" state where the table may only be
// reinitialized or destroyed where the `alloc_size` is 0 and `storage` is
// null. Since it doesn't track the exact number of filled entries in a table,
// it doesn't support a container-style `size` API.
//
// - There is no direct iterator support because of the complexity of embedding
// the group-based metadata scanning into an iterator model. Instead, there is
// just a for-each method that is passed a lambda to observe all entries. The
// order of this observation is also not guaranteed.
namespace Carbon::RawHashtable {
// Which prefetch strategies to enable can be controlled via macros to enable
// doing experiments.
//
// Currently, benchmarking on both modern AMD and ARM CPUs seems to indicate
// that the entry group prefetching is more beneficial than metadata, but that
// benefit is degraded when enabling them both. This determined our current
// default of no metadata prefetch but enabled entry group prefetch.
//
// Override these by defining them as part of the build explicitly to either `0`
// or `1`. If left undefined, the defaults will be supplied.
#ifndef CARBON_ENABLE_PREFETCH_METADATA
#define CARBON_ENABLE_PREFETCH_METADATA 0
#endif
#ifndef CARBON_ENABLE_PREFETCH_ENTRY_GROUP
#define CARBON_ENABLE_PREFETCH_ENTRY_GROUP 1
#endif
// If allocating storage, allocate a minimum of one cacheline of group metadata
// or a minimum of one group, whichever is larger.
constexpr ssize_t MinAllocatedSize = std::max<ssize_t>(64, MaxGroupSize);
// An entry in the hashtable storage of a `KeyT` and `ValueT` object.
//
// Allows manual construction, destruction, and access to these values so we can
// create arrays af the entries prior to populating them with actual keys and
// values.
template <typename KeyT, typename ValueT>
struct StorageEntry {
static constexpr bool IsTriviallyDestructible =
std::is_trivially_destructible_v<KeyT> &&
std::is_trivially_destructible_v<ValueT>;
static constexpr bool IsTriviallyRelocatable =
IsTriviallyDestructible && std::is_trivially_move_constructible_v<KeyT> &&
std::is_trivially_move_constructible_v<ValueT>;
static constexpr bool IsCopyable =
IsTriviallyRelocatable || (std::is_copy_constructible_v<KeyT> &&
std::is_copy_constructible_v<ValueT>);
auto key() const -> const KeyT& {
// Ensure we don't need more alignment than available. Inside a method body
// to apply to the complete type.
static_assert(
alignof(StorageEntry) <= MinAllocatedSize,
"The minimum allocated size turns into the alignment of our array of "
"storage entries as they follow the metadata byte array.");
return *std::launder(reinterpret_cast<const KeyT*>(&key_storage));
}
auto key() -> KeyT& {
return const_cast<KeyT&>(const_cast<const StorageEntry*>(this)->key());
}
auto value() const -> const ValueT& {
return *std::launder(reinterpret_cast<const ValueT*>(&value_storage));
}
auto value() -> ValueT& {
return const_cast<ValueT&>(const_cast<const StorageEntry*>(this)->value());
}
// We handle destruction and move manually as we only want to expose distinct
// `KeyT` and `ValueT` subobjects to user code that may need to do in-place
// construction. As a consequence, this struct only provides the storage and
// we have to manually manage the construction, move, and destruction of the
// objects.
auto Destroy() -> void {
static_assert(!IsTriviallyDestructible,
"Should never instantiate when trivial!");
key().~KeyT();
value().~ValueT();
}
auto CopyFrom(const StorageEntry& entry) -> void {
if constexpr (IsTriviallyRelocatable) {
memcpy(this, &entry, sizeof(StorageEntry));
} else {
new (&key_storage) KeyT(entry.key());
new (&value_storage) ValueT(entry.value());
}
}
// Move from an expiring entry and destroy that entry's key and value.
// Optimizes to directly use `memcpy` when correct.
auto MoveFrom(StorageEntry&& entry) -> void {
if constexpr (IsTriviallyRelocatable) {
memcpy(this, &entry, sizeof(StorageEntry));
} else {
new (&key_storage) KeyT(std::move(entry.key()));
entry.key().~KeyT();
new (&value_storage) ValueT(std::move(entry.value()));
entry.value().~ValueT();
}
}
alignas(KeyT) std::byte key_storage[sizeof(KeyT)];
alignas(ValueT) std::byte value_storage[sizeof(ValueT)];
};
// A specialization of the storage entry for sets without a distinct value type.
// Somewhat duplicative with the key-value version, but C++ specialization makes
// doing better difficult.
template <typename KeyT>
struct StorageEntry<KeyT, void> {
static constexpr bool IsTriviallyDestructible =
std::is_trivially_destructible_v<KeyT>;
static constexpr bool IsTriviallyRelocatable =
IsTriviallyDestructible && std::is_trivially_move_constructible_v<KeyT>;
static constexpr bool IsCopyable =
IsTriviallyRelocatable || std::is_copy_constructible_v<KeyT>;
auto key() const -> const KeyT& {
// Ensure we don't need more alignment than available.
static_assert(
alignof(StorageEntry) <= MinAllocatedSize,
"The minimum allocated size turns into the alignment of our array of "
"storage entries as they follow the metadata byte array.");
return *std::launder(reinterpret_cast<const KeyT*>(&key_storage));
}
auto key() -> KeyT& {
return const_cast<KeyT&>(const_cast<const StorageEntry*>(this)->key());
}
auto Destroy() -> void {
static_assert(!IsTriviallyDestructible,
"Should never instantiate when trivial!");
key().~KeyT();
}
auto CopyFrom(const StorageEntry& entry) -> void
requires(IsCopyable)
{
if constexpr (IsTriviallyRelocatable) {
memcpy(this, &entry, sizeof(StorageEntry));
} else {
new (&key_storage) KeyT(entry.key());
}
}
auto MoveFrom(StorageEntry&& entry) -> void {
if constexpr (IsTriviallyRelocatable) {
memcpy(this, &entry, sizeof(StorageEntry));
} else {
new (&key_storage) KeyT(std::move(entry.key()));
entry.key().~KeyT();
}
}
alignas(KeyT) std::byte key_storage[sizeof(KeyT)];
};
struct Metrics {
// How many keys are present in the table.
ssize_t key_count = 0;
// How many slots of the table are reserved due to deleted markers required to
// preserve probe sequences.
ssize_t deleted_count = 0;
// How many bytes of allocated storage are used by the table. Note, does not
// include the table object or any small-size buffer.
ssize_t storage_bytes = 0;
// How many keys have required probing beyond the initial group. These are the
// keys with a probe distance > 0.
ssize_t probed_key_count = 0;
// The probe distance averaged over every key. If every key is in its initial
// group, this will be zero as no keys will have a larger probe distance. In
// general, we want this to be as close to zero as possible.
double probe_avg_distance = 0.0;
// The maximum probe distance found for a single key in the table.
ssize_t probe_max_distance = 0;
// The average number of probing comparisons required to locate a specific key
// in the table. This is how many comparisons are required *before* the key is
// located, or the *failed* comparisons. We always have to do one successful
// comparison at the end. This successful comparison isn't counted because
// that focuses this metric on the overhead the table is introducing, and
// keeps a "perfect" table with an average of `0.0` here similar to the
// perfect average of `0.0` average probe distance.
double probe_avg_compares = 0.0;
// The maximum number of probing comparisons required to locate a specific
// key in the table.
ssize_t probe_max_compares = 0;
};
// A placeholder empty type used to model pointers to the allocated buffer of
// storage.
//
// The allocated storage doesn't have a meaningful static layout -- it consists
// of an array of metadata groups followed by an array of storage entries.
// However, we want to be able to mark pointers to this and so use pointers to
// this placeholder type as that signifier.
//
// This is a complete, empty type so that it can be used as a base class of a
// specific concrete storage type for compile-time sized storage.
struct Storage {};
// Forward declaration to support friending, see the definition below.
template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
class BaseImpl;
// Implementation helper for defining a read-only view type for a hashtable.
//
// A specific user-facing hashtable view type should derive privately from this
// type, and forward the implementation of its interface to functions in this
// type.
//
// The methods available to user-facing hashtable types are `protected`, and
// where they are expected to directly map to a public API, named with an
// `Impl`. The suffix naming ensures types don't `using` in these low-level APIs
// but declare their own and implement them by forwarding to these APIs. We
// don't want users to have to read these implementation details to understand
// their container's API, so none of these methods should be `using`-ed into the
// user facing types.
//
// Some of the types are just convenience aliases and aren't important to
// surface as part of the user-facing type API for readers and so those are
// reasonable to add via a `using`.
//
// Some methods are used by other parts of the raw hashtable implementation.
// Those are kept `private` and where necessary the other components of the raw
// hashtable implementation are friended to give access to them.
template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
class ViewImpl {
protected:
using KeyT = InputKeyT;
using ValueT = InputValueT;
using KeyContextT = InputKeyContextT;
using EntryT = StorageEntry<KeyT, ValueT>;
using MetricsT = Metrics;
friend class BaseImpl<KeyT, ValueT, KeyContextT>;
template <typename InputBaseT, ssize_t SmallSize>
friend class TableImpl;
// Make more-`const` types friends to enable conversions that add `const`.
friend class ViewImpl<const KeyT, ValueT, KeyContextT>;
friend class ViewImpl<KeyT, const ValueT, KeyContextT>;
friend class ViewImpl<const KeyT, const ValueT, KeyContextT>;
ViewImpl() = default;
// Support adding `const` to either key or value type of some other view.
template <typename OtherKeyT, typename OtherValueT>
// NOLINTNEXTLINE(google-explicit-constructor)
ViewImpl(ViewImpl<OtherKeyT, OtherValueT, KeyContextT> other_view)
requires(std::same_as<KeyT, OtherKeyT> ||
std::same_as<KeyT, const OtherKeyT>) &&
(std::same_as<ValueT, OtherValueT> ||
std::same_as<ValueT, const OtherValueT>)
: alloc_size_(other_view.alloc_size_), storage_(other_view.storage_) {}
// Looks up an entry in the hashtable and returns its address or null if not
// present.
template <typename LookupKeyT>
auto LookupEntry(LookupKeyT lookup_key, KeyContextT key_context) const
-> EntryT*;
// Calls `entry_callback` for each entry in the hashtable. All the entries
// within a specific group are visited first, and then `group_callback` is
// called on the group itself. The `group_callback` is typically only used by
// the internals of the hashtable.
template <typename EntryCallbackT, typename GroupCallbackT>
auto ForEachEntry(EntryCallbackT entry_callback,
GroupCallbackT group_callback) const -> void;
// Returns a collection of informative metrics on the the current state of the
// table, useful for performance analysis. These include relatively slow to
// compute metrics requiring deep inspection of the table's state.
auto ComputeMetricsImpl(KeyContextT key_context) const -> MetricsT;
private:
ViewImpl(ssize_t alloc_size, Storage* storage)
: alloc_size_(alloc_size), storage_(storage) {}
// Computes the offset from the metadata array to the entries array for a
// given size. This is trivial, but we use this routine to enforce invariants
// on the sizes.
static constexpr auto EntriesOffset(ssize_t alloc_size) -> ssize_t {
CARBON_DCHECK(llvm::isPowerOf2_64(alloc_size),
"Size must be a power of two for a hashed buffer!");
// The size is always a power of two. We prevent any too-small sizes so it
// being a power of two provides the needed alignment. As a result, the
// offset is exactly the size. We validate this here to catch alignment bugs
// early.
CARBON_DCHECK(static_cast<uint64_t>(alloc_size) ==
llvm::alignTo<alignof(EntryT)>(alloc_size));
return alloc_size;
}
// Compute the allocated table's byte size.
static constexpr auto AllocByteSize(ssize_t alloc_size) -> ssize_t {
return EntriesOffset(alloc_size) + sizeof(EntryT) * alloc_size;
}
auto metadata() const -> uint8_t* {
return reinterpret_cast<uint8_t*>(storage_);
}
auto entries() const -> EntryT* {
return reinterpret_cast<EntryT*>(reinterpret_cast<std::byte*>(storage_) +
EntriesOffset(alloc_size_));
}
// Prefetch the metadata prior to probing. This is to overlap any of the
// memory access latency we can with the hashing of a key or other
// latency-bound operation prior to probing.
auto PrefetchMetadata() const -> void {
if constexpr (CARBON_ENABLE_PREFETCH_METADATA) {
// Prefetch with a "low" temporal locality as we're primarily expecting a
// brief use of the metadata and then to return to application code.
__builtin_prefetch(metadata(), /*read*/ 0, /*low-locality*/ 1);
}
}
// Prefetch an entry. This prefetches for read as it is primarily expected to
// be used in the probing path, and writing afterwards isn't especially slowed
// down. We don't want to synthesize writes unless we *know* we're going to
// write.
static auto PrefetchEntryGroup(const EntryT* entry_group) -> void {
if constexpr (CARBON_ENABLE_PREFETCH_ENTRY_GROUP) {
// Prefetch with a "low" temporal locality as we're primarily expecting a
// brief use of the entries and then to return to application code.
__builtin_prefetch(entry_group, /*read*/ 0, /*low-locality*/ 1);
}
}
ssize_t alloc_size_;
Storage* storage_;
};
// Implementation helper for defining a read-write base type for a hashtable
// that type-erases any SSO buffer.
//
// A specific user-facing hashtable base type should derive using *`protected`*
// inheritance from this type, and forward the implementation of its interface
// to functions in this type.
//
// Other than the use of `protected` inheritance, the patterns for this type,
// and how to build user-facing hashtable base types from it, mirror those of
// `ViewImpl`. See its documentation for more details.
template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
class BaseImpl {
protected:
using KeyT = InputKeyT;
using ValueT = InputValueT;
using KeyContextT = InputKeyContextT;
using ViewImplT = ViewImpl<KeyT, ValueT, KeyContextT>;
using EntryT = typename ViewImplT::EntryT;
using MetricsT = typename ViewImplT::MetricsT;
BaseImpl(int small_alloc_size, Storage* small_storage)
: small_alloc_size_(small_alloc_size) {
CARBON_CHECK(small_alloc_size >= 0);
Construct(small_storage);
}
// Only used for copying and moving, and leaves storage uninitialized.
BaseImpl(ssize_t alloc_size, int growth_budget, int small_alloc_size)
: view_impl_(alloc_size, nullptr),
growth_budget_(growth_budget),
small_alloc_size_(small_alloc_size) {}
// Destruction must be handled by the table where it can destroy entries in
// any small buffer, so make the base destructor protected but defaulted here.
~BaseImpl() = default;
// NOLINTNEXTLINE(google-explicit-constructor): Designed to implicitly decay.
operator ViewImplT() const { return view_impl(); }
auto view_impl() const -> ViewImplT { return view_impl_; }
// Looks up the provided key in the hashtable. If found, returns a pointer to
// that entry and `false`.
//
// If not found, will locate an empty entry for inserting into, set the
// metadata for that entry, and return a pointer to the entry and `true`. When
// necessary, this will grow the hashtable to cause there to be sufficient
// empty entries.
template <typename LookupKeyT>
auto InsertImpl(LookupKeyT lookup_key, KeyContextT key_context)
-> std::pair<EntryT*, bool>;
// Grow the table to specific allocation size.
//
// This will grow the the table if necessary for it to have an allocation size
// of `target_alloc_size` which must be a power of two. Note that this will
// not allow that many keys to be inserted into the hashtable, but a smaller
// number based on the load factor. If a specific number of insertions need to
// be achieved without triggering growth, use the `GrowForInsertCountImpl`
// method.
auto GrowToAllocSizeImpl(ssize_t target_alloc_size, KeyContextT key_context)
-> void;
// Grow the table to allow inserting the specified number of keys.
auto GrowForInsertCountImpl(ssize_t count, KeyContextT key_context) -> void;
// Looks up the entry in the hashtable, and if found destroys the entry and
// returns `true`. If not found, returns `false`.
//
// Does not release any memory, just leaves a tombstone behind so this entry
// cannot be found and the slot can in theory be reused.
template <typename LookupKeyT>
auto EraseImpl(LookupKeyT lookup_key, KeyContextT key_context) -> bool;
// Erases all entries in the hashtable but leaves the allocated storage.
auto ClearImpl() -> void;
private:
template <typename InputBaseT, ssize_t SmallSize>
friend class TableImpl;
static constexpr ssize_t Alignment = std::max<ssize_t>(
alignof(MetadataGroup), alignof(StorageEntry<KeyT, ValueT>));
// Implementation of inline small storage for the provided key type, value
// type, and small size. Specialized for a zero small size to be an empty
// struct.
template <ssize_t SmallSize>
struct SmallStorage : Storage {
alignas(Alignment) uint8_t metadata[SmallSize];
mutable StorageEntry<KeyT, ValueT> entries[SmallSize];
};
// Specialized storage with no inline buffer to avoid any extra alignment.
template <>
struct SmallStorage<0> {};
static auto Allocate(ssize_t alloc_size) -> Storage*;
static auto Deallocate(Storage* storage, ssize_t alloc_size) -> void;
auto growth_budget() const -> ssize_t { return growth_budget_; }
auto alloc_size() const -> ssize_t { return view_impl_.alloc_size_; }
auto alloc_size() -> ssize_t& { return view_impl_.alloc_size_; }
auto storage() const -> Storage* { return view_impl_.storage_; }
auto storage() -> Storage*& { return view_impl_.storage_; }
auto metadata() const -> uint8_t* { return view_impl_.metadata(); }
auto entries() const -> EntryT* { return view_impl_.entries(); }
auto small_alloc_size() const -> ssize_t {
return static_cast<unsigned>(small_alloc_size_);
}
auto is_small() const -> bool {
CARBON_DCHECK(alloc_size() >= small_alloc_size());
return alloc_size() == small_alloc_size();
}
// Wrapper to call `ViewImplT::PrefetchStorage`, see that method for details.
auto PrefetchStorage() const -> void { view_impl_.PrefetchMetadata(); }
auto Construct(Storage* small_storage) -> void;
auto Destroy() -> void;
auto CopySlotsFrom(const BaseImpl& arg) -> void
requires(EntryT::IsCopyable);
auto MoveFrom(BaseImpl&& arg, Storage* small_storage) -> void;
auto InsertIntoEmpty(HashCode hash) -> EntryT*;
static auto ComputeNextAllocSize(ssize_t old_alloc_size) -> ssize_t;
static auto GrowthThresholdForAllocSize(ssize_t alloc_size) -> ssize_t;
auto GrowToNextAllocSize(KeyContextT key_context) -> void;
auto GrowAndInsert(HashCode hash, KeyContextT key_context) -> EntryT*;
ViewImplT view_impl_;
int growth_budget_;
int small_alloc_size_;
};
// Implementation helper for defining a hashtable type with an SSO buffer.
//
// A specific user-facing hashtable should derive privately from this
// type, and forward the implementation of its interface to functions in this
// type. It should provide the corresponding user-facing hashtable base type as
// the `InputBaseT` type parameter (rather than a key/value pair), and this type
// will in turn derive from that provided base type. This allows derived-to-base
// conversion from the user-facing hashtable type to the user-facing hashtable
// base type. And it does so keeping the inheritance linear. The resulting
// linear inheritance hierarchy for a `Map<K, T>` type will look like:
//
// Map<K, T>
// ↓
// TableImpl<MapBase<K, T>>
// ↓
// MapBase<K, T>
// ↓
// BaseImpl<K, T>
//
// Other than this inheritance technique, the patterns for this type, and how to
// build user-facing hashtable types from it, mirror those of `ViewImpl`. See
// its documentation for more details.
template <typename InputBaseT, ssize_t SmallSize>
class TableImpl : public InputBaseT {
protected:
using BaseT = InputBaseT;
TableImpl() : BaseT(SmallSize, small_storage()) {}
TableImpl(const TableImpl& arg)
requires(BaseT::EntryT::IsCopyable);
TableImpl(TableImpl&& arg) noexcept;
auto operator=(const TableImpl& arg) -> TableImpl&
requires(BaseT::EntryT::IsCopyable);
auto operator=(TableImpl&& arg) noexcept -> TableImpl&;
~TableImpl();
// Resets the hashtable to its initial state, clearing all entries and
// releasing all memory. If the hashtable had an SSO buffer, that is restored
// as the storage. Otherwise, a minimum sized table storage is allocated.
auto ResetImpl() -> void;
private:
using KeyT = BaseT::KeyT;
using ValueT = BaseT::ValueT;
using EntryT = BaseT::EntryT;
using SmallStorage = BaseT::template SmallStorage<SmallSize>;
auto small_storage() const -> Storage*;
auto SetUpStorage() -> void;
[[no_unique_address]] mutable SmallStorage small_storage_;
};
////////////////////////////////////////////////////////////////////////////////
//
// Only implementation details below this point.
//
////////////////////////////////////////////////////////////////////////////////
// Computes a seed that provides a small amount of entropy from ASLR where
// available with minimal cost. The priority is speed, and this computes the
// entropy in a way that doesn't require loading from memory, merely accessing
// entropy already available without accessing memory.
inline auto ComputeSeed() -> uint64_t {
// A global variable whose address is used as a seed. This allows ASLR to
// introduce some variation in hashtable ordering when enabled via the code
// model for globals.
extern volatile std::byte global_addr_seed;
return reinterpret_cast<uint64_t>(&global_addr_seed);
}
inline auto ComputeProbeMaskFromSize(ssize_t size) -> size_t {
CARBON_DCHECK(llvm::isPowerOf2_64(size),
"Size must be a power of two for a hashed buffer!");
// Since `size` is a power of two, we can make sure the probes are less
// than `size` by making the mask `size - 1`. We also mask off the low
// bits so the probes are a multiple of the size of the groups of entries.
return (size - 1) & ~GroupMask;
}
// This class handles building a sequence of probe indices from a given
// starting point, including both the quadratic growth and masking the index
// to stay within the bucket array size. The starting point doesn't need to be
// clamped to the size ahead of time (or even be positive), we will do it
// internally.
//
// For reference on quadratic probing:
// https://en.wikipedia.org/wiki/Quadratic_probing
//
// We compute the quadratic probe index incrementally, but we can also compute
// it mathematically and will check that the incremental result matches our
// mathematical expectation. We use the quadratic probing formula of:
//
// p(start, step) = (start + (step + step^2) / 2) (mod size / GroupSize)
//
// However, we compute it incrementally and scale all the variables by the group
// size so it can be used as an index without an additional multiplication.
class ProbeSequence {
public:
ProbeSequence(ssize_t start, ssize_t size) {
mask_ = ComputeProbeMaskFromSize(size);
p_ = start & mask_;
#ifndef NDEBUG
start_ = start & mask_;
size_ = size;
#endif
}
auto Next() -> void {
step_ += GroupSize;
p_ = (p_ + step_) & mask_;
#ifndef NDEBUG
// Verify against the quadratic formula we expect to be following by scaling
// everything down by `GroupSize`.
CARBON_DCHECK(
(p_ / GroupSize) ==
((start_ / GroupSize +
(step_ / GroupSize + (step_ / GroupSize) * (step_ / GroupSize)) /
2) %
(size_ / GroupSize)),
"Index in probe sequence does not match the expected formula.");
CARBON_DCHECK(step_ < size_,
"We necessarily visit all groups, so we can't have more "
"probe steps than groups.");
#endif
}
auto index() const -> ssize_t { return p_; }
private:
ssize_t step_ = 0;
size_t mask_;
ssize_t p_;
#ifndef NDEBUG
ssize_t start_;
ssize_t size_;
#endif
};
// TODO: Evaluate keeping this outlined to see if macro benchmarks observe the
// same perf hit as micro benchmarks.
template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
template <typename LookupKeyT>
auto ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::LookupEntry(
LookupKeyT lookup_key, KeyContextT key_context) const -> EntryT* {
PrefetchMetadata();
ssize_t local_size = alloc_size_;
CARBON_DCHECK(local_size > 0);
uint8_t* local_metadata = metadata();
HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
EntryT* local_entries = entries();
// Walk through groups of entries using a quadratic probe starting from
// `hash_index`.
ProbeSequence s(hash_index, local_size);
do {
ssize_t group_index = s.index();
// Load the group's metadata and prefetch the entries for this group. The
// prefetch here helps hide key access latency while we're matching the
// metadata.
MetadataGroup g = MetadataGroup::Load(local_metadata, group_index);
EntryT* group_entries = &local_entries[group_index];
PrefetchEntryGroup(group_entries);
// For each group, match the tag against the metadata to extract the
// potentially matching entries within the group.
auto metadata_matched_range = g.Match(tag);
if (LLVM_LIKELY(metadata_matched_range)) {
// If any entries in this group potentially match based on their metadata,
// walk each candidate and compare its key to see if we have definitively
// found a match.
auto byte_it = metadata_matched_range.begin();
auto byte_end = metadata_matched_range.end();
do {
EntryT* entry = byte_it.index_ptr(group_entries);
if (LLVM_LIKELY(key_context.KeyEq(lookup_key, entry->key()))) {
__builtin_assume(entry != nullptr);
return entry;
}
++byte_it;
} while (LLVM_UNLIKELY(byte_it != byte_end));
}
// We failed to find a matching entry in this bucket, so check if there are
// empty slots as that indicates we're done probing -- no later probed index
// could have a match.
auto empty_byte_matched_range = g.MatchEmpty();
if (LLVM_LIKELY(empty_byte_matched_range)) {
return nullptr;
}
s.Next();
// We use a weird construct of an "unlikely" condition of `true`. The goal
// is to get the compiler to not prioritize the back edge of the loop for
// code layout, and in at least some tests this seems to be an effective
// construct for achieving this.
} while (LLVM_UNLIKELY(true));
}
// Note that we force inlining here because we expect to be called with lambdas
// that will in turn be inlined to form the loop body. We don't want function
// boundaries within the loop for performance, and recognizing the degree of
// simplification from inlining these callbacks may be difficult to
// automatically recognize.
template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
template <typename EntryCallbackT, typename GroupCallbackT>
[[clang::always_inline]] auto
ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::ForEachEntry(
EntryCallbackT entry_callback, GroupCallbackT group_callback) const
-> void {
uint8_t* local_metadata = metadata();
EntryT* local_entries = entries();
ssize_t local_size = alloc_size_;
for (ssize_t group_index = 0; group_index < local_size;
group_index += GroupSize) {
auto g = MetadataGroup::Load(local_metadata, group_index);
auto present_matched_range = g.MatchPresent();
if (!present_matched_range) {
continue;
}
for (ssize_t byte_index : present_matched_range) {
entry_callback(local_entries[group_index + byte_index]);
}
group_callback(&local_metadata[group_index]);
}
}
template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
auto ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::ComputeMetricsImpl(
KeyContextT key_context) const -> Metrics {
uint8_t* local_metadata = metadata();
EntryT* local_entries = entries();
ssize_t local_size = alloc_size_;
Metrics metrics;
// Compute the ones we can directly.
metrics.deleted_count = llvm::count(
llvm::ArrayRef(local_metadata, local_size), MetadataGroup::Deleted);
metrics.storage_bytes = AllocByteSize(local_size);
// We want to process present slots specially to collect metrics on their
// probing behavior.
for (ssize_t group_index = 0; group_index < local_size;
group_index += GroupSize) {
auto g = MetadataGroup::Load(local_metadata, group_index);
auto present_matched_range = g.MatchPresent();
for (ssize_t byte_index : present_matched_range) {
++metrics.key_count;
ssize_t index = group_index + byte_index;
HashCode hash =
key_context.HashKey(local_entries[index].key(), ComputeSeed());
auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
ProbeSequence s(hash_index, local_size);
metrics.probed_key_count +=
static_cast<ssize_t>(s.index() != group_index);
// For each probed key, go through the probe sequence to find both the
// probe distance and how many comparisons are required.
ssize_t distance = 0;
ssize_t compares = 0;
for (; s.index() != group_index; s.Next()) {
auto probe_g = MetadataGroup::Load(local_metadata, s.index());
auto probe_matched_range = probe_g.Match(tag);
compares += std::distance(probe_matched_range.begin(),
probe_matched_range.end());
distance += 1;
}
auto probe_g = MetadataGroup::Load(local_metadata, s.index());
auto probe_matched_range = probe_g.Match(tag);
CARBON_CHECK(!probe_matched_range.empty());
for (ssize_t match_index : probe_matched_range) {
if (match_index >= byte_index) {
// Note we only count the compares that will *fail* as part of
// probing. The last successful compare isn't interesting, it is
// always needed.
break;
}
compares += 1;
}
metrics.probe_avg_distance += distance;
metrics.probe_max_distance =
std::max(metrics.probe_max_distance, distance);
metrics.probe_avg_compares += compares;
metrics.probe_max_compares =
std::max(metrics.probe_max_compares, compares);
}
}
if (metrics.key_count > 0) {
metrics.probe_avg_compares /= metrics.key_count;
metrics.probe_avg_distance /= metrics.key_count;
}
return metrics;
}
// TODO: Evaluate whether it is worth forcing this out-of-line given the
// reasonable ABI boundary it forms and large volume of code necessary to
// implement it.
template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
template <typename LookupKeyT>
auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::InsertImpl(
LookupKeyT lookup_key, KeyContextT key_context)
-> std::pair<EntryT*, bool> {
CARBON_DCHECK(alloc_size() > 0);
PrefetchStorage();
uint8_t* local_metadata = metadata();
HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
// We re-purpose the empty control byte to signal no insert is needed to the
// caller. This is guaranteed to not be a control byte we're inserting.
// constexpr uint8_t NoInsertNeeded = Group::Empty;
ssize_t group_with_deleted_index;
MetadataGroup::MatchIndex deleted_match = {};
EntryT* local_entries = entries();
auto return_insert_at_index = [&](ssize_t index) -> std::pair<EntryT*, bool> {
// We'll need to insert at this index so set the control group byte to the
// proper value.
local_metadata[index] = tag | MetadataGroup::PresentMask;
return {&local_entries[index], true};
};
for (ProbeSequence s(hash_index, alloc_size());; s.Next()) {
ssize_t group_index = s.index();
// Load the group's metadata and prefetch the entries for this group. The
// prefetch here helps hide key access latency while we're matching the
// metadata.
auto g = MetadataGroup::Load(local_metadata, group_index);
EntryT* group_entries = &local_entries[group_index];
ViewImplT::PrefetchEntryGroup(group_entries);
auto control_byte_matched_range = g.Match(tag);
if (control_byte_matched_range) {
auto byte_it = control_byte_matched_range.begin();
auto byte_end = control_byte_matched_range.end();
do {
EntryT* entry = byte_it.index_ptr(group_entries);
if (LLVM_LIKELY(key_context.KeyEq(lookup_key, entry->key()))) {
return {entry, false};
}
++byte_it;
} while (LLVM_UNLIKELY(byte_it != byte_end));
}
// Track the first group with a deleted entry that we could insert over.
if (!deleted_match) {
deleted_match = g.MatchDeleted();
group_with_deleted_index = group_index;
}
// We failed to find a matching entry in this bucket, so check if there are
// no empty slots. In that case, we'll continue probing.
auto empty_match = g.MatchEmpty();
if (!empty_match) {
continue;
}
// Ok, we've finished probing without finding anything and need to insert
// instead.
// If we found a deleted slot, we don't need the probe sequence to insert
// so just bail. We want to ensure building up a table is fast so we
// de-prioritize this a bit. In practice this doesn't have too much of an
// effect.
if (LLVM_UNLIKELY(deleted_match)) {
return return_insert_at_index(group_with_deleted_index +
deleted_match.index());
}
// We're going to need to grow by inserting into an empty slot. Check that
// we have the budget for that before we compute the exact index of the
// empty slot. Without the growth budget we'll have to completely rehash and
// so we can just bail here.
if (LLVM_UNLIKELY(growth_budget_ == 0)) {
return {GrowAndInsert(hash, key_context), true};
}
--growth_budget_;
CARBON_DCHECK(growth_budget() >= 0,
"Growth budget shouldn't have gone negative!");
return return_insert_at_index(group_index + empty_match.index());
}
CARBON_FATAL(
"We should never finish probing without finding the entry or an empty "
"slot.");
}
template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>