Branch data Line data Source code
1 : : #pragma once
2 : :
3 : : #include "backend/WasmUtil.hpp"
4 : : #include <mutable/parse/AST.hpp>
5 : : #include <optional>
6 : :
7 : : // must be included after Binaryen due to conflicts, e.g. with `::wasm::Throw`
8 : : #include "backend/WasmMacro.hpp"
9 : :
10 : :
11 : : namespace m {
12 : :
13 : : namespace wasm {
14 : :
15 : : // forward declarations
16 : : template<bool IsGlobal> struct ChainedHashTable;
17 : : template<bool IsGlobal, bool ValueInPlace> struct OpenAddressingHashTable;
18 : : struct ProbingStrategy;
19 : :
20 : : /*======================================================================================================================
21 : : * sorting
22 : : *====================================================================================================================*/
23 : :
24 : : /** Sorts the buffer \p buffer using the quicksort algorithm and a branchless binary partition algorithm. The
25 : : * ordering is specified by \p order where the first element is the expression to order on and the second element is
26 : : * `true` iff ordering should be performed ascending. Comparison is performed branchless iff \tparam CmpPredicated. */
27 : : template<bool CmpPredicated, bool IsGlobal>
28 : : void quicksort(Buffer<IsGlobal> &buffer, const std::vector<SortingOperator::order_type> &order);
29 : :
30 : :
31 : : /*======================================================================================================================
32 : : * hashing
33 : : *====================================================================================================================*/
34 : :
35 : : /*----- bit mix functions --------------------------------------------------------------------------------------------*/
36 : :
37 : : /** Mixes the bits of \p bits using the Murmur3 algorithm. */
38 : : U64x1 murmur3_bit_mix(U64x1 bits);
39 : :
40 : :
41 : : /*----- hash functions -----------------------------------------------------------------------------------------------*/
42 : :
43 : : /** Hashes \p num_bytes bytes of \p bytes using the FNV-1a algorithm. */
44 : : U64x1 fnv_1a(Ptr<U8x1> bytes, U32x1 num_bytes);
45 : : /** Hashes the string \p str. */
46 : : U64x1 str_hash(NChar str);
47 : : /** Hashes the elements of \p values where the first element is the type of the value to hash and the second element
48 : : * is the value itself using the Murmur3-64a algorithm. */
49 : : U64x1 murmur3_64a_hash(std::vector<std::pair<const Type*, SQL_t>> values);
50 : :
51 : :
52 : : /*----- hash tables --------------------------------------------------------------------------------------------------*/
53 : :
54 : : /** Hash table to hash key-value pairs in memory. */
55 : : struct HashTable
56 : : {
57 : : using index_t = std::size_t;
58 : : using offset_t = int32_t;
59 : : using size_t = uint32_t;
60 : :
61 : : // forward declaration
62 : : template<bool IsConst> struct the_entry;
63 : :
64 : : /** Helper struct as proxy to access a single value (inclusive NULL bit) of a hash table entry. */
65 : : template<sql_type T, bool IsConst>
66 [ # # # # : 0 : struct the_reference;
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
67 : :
68 : : template<sql_type T, bool IsConst>
69 : : requires (not std::same_as<T, NChar>) and (T::num_simd_lanes == 1)
70 : : struct the_reference<T, IsConst>
71 : : {
72 : : friend struct the_entry<false>;
73 : : friend struct the_entry<true>;
74 : :
75 : : using value_t = PrimitiveExpr<typename T::type>;
76 : :
77 : : private:
78 : : Ptr<value_t> value_;
79 : : std::optional<Ptr<U8x1>> is_null_byte_;
80 : : std::optional<U8x1> is_null_mask_;
81 : :
82 : 0 : explicit the_reference(Ptr<value_t> value, std::optional<Ptr<U8x1>> is_null_byte, std::optional<U8x1> is_null_mask)
83 : 0 : : value_(value)
84 [ # # # # : 0 : , is_null_byte_(std::move(is_null_byte))
# # # # #
# # # #
# ]
85 [ # # # # : 0 : , is_null_mask_(std::move(is_null_mask))
# # # # #
# # # #
# ]
86 : : {
87 [ # # # # : 0 : M_insist(bool(is_null_byte_) == bool(is_null_mask_),
# # # # #
# # # #
# ]
88 : : "either both or none of NULL byte and NULL mask must be specified");
89 : 0 : }
90 : 0 : explicit the_reference(Ptr<value_t> value, Ptr<U8x1> is_null_byte, U8x1 is_null_mask)
91 : 0 : : value_(value)
92 [ # # # # : 0 : , is_null_byte_(is_null_byte)
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
93 [ # # # # : 0 : , is_null_mask_(is_null_mask)
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
94 : 0 : { }
95 : :
96 : : public:
97 : 0 : explicit the_reference(Ptr<value_t> value) : value_(value) { }
98 : 0 : explicit the_reference(Ptr<value_t> value, Ptr<void> null_bitmap, uint8_t null_bit_offset)
99 : 0 : : value_(value)
100 [ # # # # : 0 : , is_null_byte_((null_bitmap + (null_bit_offset / 8)).to<uint8_t*>())
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # #
# ]
101 [ # # # # : 0 : , is_null_mask_(1U << (null_bit_offset % 8))
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
102 : 0 : { }
103 : :
104 : : ///> Discards `this`.
105 : 0 : void discard() {
106 : 0 : value_.discard();
107 [ # # # # : 0 : if (is_null_byte_) {
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
108 : 0 : is_null_byte_->discard();
109 : 0 : is_null_mask_->discard();
110 : 0 : }
111 : 0 : }
112 : : ///> Returns a *deep copy* of `this`.
113 : 0 : the_reference clone() const {
114 [ # # # # : 0 : if (is_null_byte_)
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
115 [ # # # # : 0 : return the_reference(value_.clone(), is_null_byte_->clone(), is_null_mask_->clone());
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
116 : : else
117 [ # # # # : 0 : return the_reference(value_.clone());
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
118 : 0 : }
119 : :
120 : : ///> Assigns `this` to \p _value.
121 : 0 : void operator=(T _value) requires (not IsConst) {
122 : 0 : M_insist(bool(is_null_byte_) == _value.can_be_null(), "value of non-nullable entry must not be nullable");
123 [ # # # # : 0 : if (is_null_byte_) {
# # # # #
# # # #
# ]
124 : 0 : auto [value, is_null] = _value.split();
125 [ # # # # : 0 : *value_ = value;
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
126 [ # # # # : 0 : setbit(*is_null_byte_, is_null, *is_null_mask_);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
127 : 0 : } else {
128 [ # # # # : 0 : *value_ = _value.insist_not_null();
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
129 : : }
130 : 0 : }
131 : : ///> Assigns the value of `this` to \p value w/o updating the NULL bit.
132 : 0 : void set_value(value_t value) requires (not IsConst) {
133 [ # # # # : 0 : *value_ = value;
# # # # #
# # # ]
134 [ # # # # : 0 : if (is_null_byte_) {
# # # # #
# # # ]
135 : 0 : is_null_byte_->discard();
136 : 0 : is_null_mask_->discard();
137 : 0 : }
138 : 0 : }
139 : :
140 : : ///> Assigns the NULL bit of `this` to \p is_null. Does not update/modify the value of `this`.
141 : 0 : void set_null_bit(Boolx1 is_null) requires (not IsConst) {
142 : 0 : value_.discard();
143 : 0 : M_insist(bool(is_null_byte_));
144 : 0 : M_insist(bool(is_null_mask_));
145 [ # # # # : 0 : setbit(*is_null_byte_, is_null, *is_null_mask_);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
146 : 0 : }
147 : : ///> Sets `this` to NULL. Does not modify the value of `this`.
148 : 0 : void set_null() requires (not IsConst) {
149 : 0 : value_.discard();
150 : 0 : M_insist(bool(is_null_byte_));
151 : 0 : M_insist(bool(is_null_mask_));
152 [ # # # # : 0 : **is_null_byte_ = **is_null_byte_ bitor *is_null_mask_;
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
153 : 0 : }
154 : : ///> Sets `this` to NOT NULL. Does not modify the value of `this`.
155 : 0 : void set_not_null() requires (not IsConst) {
156 : 0 : value_.discard();
157 : 0 : M_insist(bool(is_null_byte_));
158 : 0 : M_insist(bool(is_null_mask_));
159 [ # # # # : 0 : **is_null_byte_ = **is_null_byte_ bitand ~*is_null_mask_;
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
160 : 0 : }
161 : :
162 : : ///> Compares `this` with \p _value.
163 : 0 : Boolx1 operator==(T _value) {
164 : 0 : auto [value, is_null] = _value.split();
165 [ # # # # : 0 : if (is_null_byte_) {
# # # # #
# # # #
# ]
166 [ # # # # : 0 : auto is_null_ = (**is_null_byte_ bitand *is_null_mask_).template to<bool>();
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
167 [ # # # # : 0 : auto equal_nulls = is_null_.clone() == is_null;
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
168 [ # # # # : 0 : return equal_nulls and (is_null_ or *value_ == value);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
169 : 0 : } else {
170 [ # # # # : 0 : return not is_null and *value_ == value;
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
171 : : }
172 : 0 : }
173 : :
174 : : ///> Loads the value of `this`.
175 : 0 : operator T() {
176 [ # # # # : 0 : if (is_null_byte_)
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
177 [ # # # # : 0 : return T(*value_, (**is_null_byte_ bitand *is_null_mask_).template to<bool>());
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
178 : : else
179 [ # # # # : 0 : return T(*value_);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
180 : 0 : }
181 : :
182 : : ///> Selects either the reference \p tru or \p fals depending on the value of \p cond.
183 : 0 : friend the_reference Select(Boolx1 cond, the_reference tru, the_reference fals) {
184 : 0 : M_insist(bool(tru.is_null_byte_) == bool(fals.is_null_byte_), "null byte mismatch");
185 : 0 : M_insist(bool(tru.is_null_mask_) == bool(fals.is_null_mask_), "null mask mismatch");
186 [ # # # # : 0 : if (tru.is_null_byte_) {
# # # # #
# # # ]
187 [ # # # # : 0 : the_reference r(
# # # # #
# # # ]
188 [ # # # # : 0 : /* value_= */ Select(cond.clone(), tru.value_, fals.value_),
# # # # #
# # # ]
189 [ # # # # : 0 : /* is_null_byte_= */ Select(cond.clone(), *tru.is_null_byte_, *fals.is_null_byte_),
# # # # #
# # # # #
# # # # #
# # # #
# ]
190 [ # # # # : 0 : /* is_null_mask_= */ Select(cond.clone(), *tru.is_null_mask_, *fals.is_null_mask_)
# # # # #
# # # # #
# # # # #
# # # #
# ]
191 : : );
192 [ # # # # : 0 : cond.discard();
# # # # #
# # # ]
193 : 0 : return r;
194 [ # # # # : 0 : } else {
# # # # #
# # # ]
195 [ # # # # : 0 : return the_reference(Select(cond, tru.value_, fals.value_));
# # # # #
# # # ]
196 : : }
197 : 0 : }
198 : : };
199 : :
200 : : template<bool IsConst>
201 : : struct the_reference<NChar, IsConst>
202 : : {
203 : : friend struct the_entry<false>;
204 : : friend struct the_entry<true>;
205 : :
206 : : private:
207 : : NChar addr_;
208 : : std::optional<Ptr<U8x1>> is_null_byte_;
209 : : std::optional<U8x1> is_null_mask_;
210 : :
211 : 0 : explicit the_reference(NChar addr, std::optional<Ptr<U8x1>> is_null_byte, std::optional<U8x1> is_null_mask)
212 : 0 : : addr_(addr)
213 [ # # ]: 0 : , is_null_byte_(std::move(is_null_byte))
214 [ # # ]: 0 : , is_null_mask_(std::move(is_null_mask))
215 : : {
216 [ # # ]: 0 : M_insist(addr.can_be_null() == bool(is_null_byte_), "nullable entry must have a NULL bit");
217 [ # # ]: 0 : M_insist(bool(is_null_byte_) == bool(is_null_mask_),
218 : : "either both or none of NULL byte and NULL mask must be specified");
219 : 0 : }
220 : 0 : explicit the_reference(NChar addr, Ptr<U8x1> is_null_byte, U8x1 is_null_mask)
221 : 0 : : addr_(addr)
222 [ # # ]: 0 : , is_null_byte_(is_null_byte)
223 [ # # ]: 0 : , is_null_mask_(is_null_mask)
224 : : {
225 [ # # ]: 0 : M_insist(addr.can_be_null(), "entry with NULL bit must be nullable");
226 : 0 : }
227 : :
228 : : public:
229 : 0 : explicit the_reference(NChar addr)
230 : 0 : : addr_(addr)
231 : : {
232 [ # # # # ]: 0 : M_insist(not addr.can_be_null(), "entry without NULL bit must be nullable"); \
233 : 0 : }
234 : 0 : explicit the_reference(NChar addr, Ptr<void> null_bitmap, uint8_t null_bit_offset)
235 : 0 : : addr_(addr)
236 [ # # # # : 0 : , is_null_byte_((null_bitmap + (null_bit_offset / 8)).to<uint8_t*>())
# # # # #
# # # ]
237 [ # # # # ]: 0 : , is_null_mask_(1U << (null_bit_offset % 8))
238 : : {
239 [ # # # # ]: 0 : M_insist(addr.can_be_null(), "entry with NULL bit must be nullable");
240 : 0 : }
241 : :
242 : : ///> Discards `this`.
243 : 0 : void discard() {
244 : 0 : addr_.discard();
245 [ # # # # ]: 0 : if (addr_.can_be_null()) {
246 : 0 : is_null_byte_->discard();
247 : 0 : is_null_mask_->discard();
248 : 0 : }
249 : 0 : }
250 : : ///> Returns a *deep copy* of `this`.
251 : 0 : the_reference clone() const {
252 [ # # ]: 0 : if (addr_.can_be_null())
253 [ # # # # : 0 : return the_reference(addr_.clone(), is_null_byte_->clone(), is_null_mask_->clone());
# # ]
254 : : else
255 [ # # ]: 0 : return the_reference(addr_.clone());
256 : 0 : }
257 : :
258 : : ///> Assigns `this` to the string stored at \p addr.
259 : 0 : void operator=(NChar addr) requires (not IsConst) {
260 [ # # ]: 0 : M_insist(addr_.length() == addr.length() and
261 : : addr_.guarantees_terminating_nul() == addr.guarantees_terminating_nul(),
262 : : "type mismatch");
263 [ # # ]: 0 : if (addr_.can_be_null()) {
264 [ # # # # : 0 : IF (not addr.clone().is_null()) {
# # ]
265 [ # # # # : 0 : strncpy(addr_, addr.clone(), U32x1(addr.size_in_bytes())).discard();
# # # # #
# # # ]
266 : 0 : };
267 [ # # # # : 0 : setbit(*is_null_byte_, addr.is_null(), *is_null_mask_);
# # ]
268 : 0 : } else {
269 [ # # # # ]: 0 : Wasm_insist(addr.clone().not_null(), "value of non-nullable entry must not be nullable");
270 [ # # # # : 0 : strncpy(addr_, addr, U32x1(addr.size_in_bytes())).discard();
# # # # #
# ]
271 : : }
272 : 0 : }
273 : : ///> Assigns the value of `this` to the string stored at \p addr w/o updating the NULL bit.
274 : : void set_value(NChar addr) requires (not IsConst) {
275 : : M_insist(addr_.length() == addr.length() and
276 : : addr_.guarantees_terminating_nul() == addr.guarantees_terminating_nul(),
277 : : "type mismatch");
278 : : strncpy(addr_, addr, U32x1(addr.size_in_bytes())).discard();
279 : : if (addr_.can_be_null()) {
280 : : is_null_byte_->discard();
281 : : is_null_mask_->discard();
282 : : }
283 : : }
284 : : ///> Assigns the NULL bit of `this` to \p is_null. Does not update/modify the value of `this`.
285 : : void set_null_bit(Boolx1 is_null) requires (not IsConst) {
286 : : addr_.discard();
287 : : M_insist(bool(is_null_byte_));
288 : : M_insist(bool(is_null_mask_));
289 : : setbit(*is_null_byte_, is_null, *is_null_mask_);
290 : : }
291 : : ///> Sets `this` to NULL. Does not modify the value of `this`.
292 : : void set_null() requires (not IsConst) {
293 : : addr_.discard();
294 : : M_insist(bool(is_null_byte_));
295 : : M_insist(bool(is_null_mask_));
296 : : **is_null_byte_ = **is_null_byte_ bitor *is_null_mask_;
297 : : }
298 : : ///> Sets `this` to NOT NULL. Does not modify the value of `this`.
299 : : void set_not_null() requires (not IsConst) {
300 : : addr_.discard();
301 : : M_insist(bool(is_null_byte_));
302 : : M_insist(bool(is_null_mask_));
303 : : **is_null_byte_ = **is_null_byte_ bitand ~*is_null_mask_;
304 : : }
305 : :
306 : : ///> Compares `this` with the string stored at \p _addr.
307 : 0 : Boolx1 operator==(NChar _addr) {
308 [ # # ]: 0 : M_insist(addr_.length() == _addr.length() and
309 : : addr_.guarantees_terminating_nul() == _addr.guarantees_terminating_nul(),
310 : : "type mismatch");
311 : 0 : auto [addr, is_nullptr] = _addr.split();
312 [ # # # # : 0 : _Boolx1 _equal_addrs = strncmp(addr_, NChar(addr, _addr.can_be_null(), addr_.length(),
# # # # ]
313 [ # # ]: 0 : addr_.guarantees_terminating_nul()),
314 [ # # ]: 0 : U32x1(addr_.length()), EQ);
315 [ # # ]: 0 : auto [equal_addrs_val, equal_addrs_is_null] = _equal_addrs.split();
316 [ # # ]: 0 : equal_addrs_is_null.discard(); // use potentially-null value but it is overruled if it is invalid, i.e. NULL
317 [ # # ]: 0 : if (addr_.can_be_null()) {
318 [ # # # # : 0 : auto is_nullptr_ = (**is_null_byte_ bitand *is_null_mask_).template to<bool>();
# # ]
319 [ # # # # : 0 : auto equal_nulls = is_nullptr_.clone() == is_nullptr;
# # ]
320 [ # # # # : 0 : return equal_nulls and (is_nullptr_ or equal_addrs_val);
# # ]
321 : 0 : } else {
322 [ # # # # : 0 : return not is_nullptr and equal_addrs_val;
# # ]
323 : : }
324 : 0 : }
325 : :
326 : : ///> Loads the value of `this`.
327 : 0 : operator NChar() {
328 [ # # ]: 0 : if (addr_.can_be_null()) {
329 [ # # # # ]: 0 : Boolx1 is_null = (**is_null_byte_ bitand *is_null_mask_).template to<bool>();
330 [ # # # # : 0 : return NChar(Select(is_null, Ptr<Charx1>::Nullptr(), addr_.val()), /* can_be_null= */ true,
# # # # ]
331 : 0 : addr_.length(), addr_.guarantees_terminating_nul());
332 : 0 : } else {
333 : 0 : return addr_;
334 : : }
335 : 0 : }
336 : :
337 : : ///> Selects either the reference \p tru or \p fals depending on the value of \p cond.
338 : : friend the_reference Select(Boolx1 cond, the_reference tru, the_reference fals) {
339 : : M_insist(tru.addr_.can_be_null() == fals.addr_.can_be_null(), "nullable mismatch");
340 : : M_insist(tru.addr_.length() == fals.addr_.length() and
341 : : tru.addr_.guarantees_terminating_nul() == fals.addr_.guarantees_terminating_nul(),
342 : : "type mismatch");
343 : : M_insist(bool(tru.is_null_byte_) == bool(fals.is_null_byte_), "null byte mismatch");
344 : : M_insist(bool(tru.is_null_mask_) == bool(fals.is_null_mask_), "null mask mismatch");
345 : : if (tru.addr_.can_be_null()) {
346 : : the_reference r(
347 : : /* addr_= */ NChar(Select(cond.clone(), tru.addr_, fals.addr_), /* can_be_null= */ true,
348 : : tru.addr_.length(), tru.addr_.guarantees_terminating_nul()),
349 : : /* is_null_byte_= */ Select(cond.clone(), *tru.is_null_byte_, *fals.is_null_byte_),
350 : : /* is_null_mask_= */ Select(cond.clone(), *tru.is_null_mask_, *fals.is_null_mask_)
351 : : );
352 : : cond.discard();
353 : : return r;
354 : : } else {
355 : : return the_reference(NChar(Select(cond, tru.addr_, fals.addr_), /* can_be_null= */ false,
356 : : tru.addr_.length(), tru.addr_.guarantees_terminating_nul()));
357 : : }
358 : : }
359 : : };
360 : :
361 : : template<sql_type T>
362 : : using reference_t = the_reference<T, false>;
363 : : template<sql_type T>
364 : : using const_reference_t = the_reference<T, true>;
365 : :
366 : : /** Helper struct as proxy to access a hash table entry. */
367 : : template<bool IsConst>
368 : : struct the_entry
369 : : {
370 : : friend struct HashTable;
371 : :
372 : : using value_t = std::variant<
373 : : std::monostate
374 : : #define ADD_TYPE(TYPE) , the_reference<TYPE, IsConst>
375 : : SQL_TYPES_SCALAR(ADD_TYPE)
376 : : #undef ADD_TYPE
377 : : >;
378 : :
379 : : private:
380 : : ///> Discards the held reference of \p ref.
381 : 0 : static void discard(value_t &ref) {
382 : 0 : std::visit(overloaded {
383 : 0 : [](auto &r) { r.discard(); },
384 : 0 : [](std::monostate) { M_unreachable("invalid variant"); },
385 : 0 : }, ref);
386 : 0 : }
387 : :
388 : : private:
389 : : ///> maps `Schema::Identifier`s to `the_reference<T, IsConst>`s that reference the stored expression,
390 : : ///> allows for duplicates similar to `Schema`
391 : : std::unordered_multimap<Schema::Identifier, value_t> refs_;
392 : :
393 : : public:
394 : 0 : the_entry() = default;
395 : : the_entry(const the_entry&) = delete;
396 : 0 : the_entry(the_entry&&) = default;
397 : :
398 : : private:
399 : : ///> constructs an entry from references of the *opposite* const-ness
400 : 0 : the_entry(std::unordered_multimap<Schema::Identifier, typename the_entry<not IsConst>::value_t> refs) {
401 [ # # ]: 0 : refs_.reserve(refs.size());
402 [ # # ]: 0 : for (auto &p : refs) {
403 [ # # # # ]: 0 : std::visit(overloaded {
404 : 0 : [&]<typename T>(the_reference<T, not IsConst> &r) {
405 [ # # # # : 0 : the_reference<T, IsConst> ref(
# # # # #
# # # #
# ]
406 [ # # # # : 0 : r.value_, std::move(r.is_null_byte_), std::move(r.is_null_mask_)
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
407 : : );
408 [ # # # # : 0 : refs_.emplace(std::move(p.first), std::move(ref));
# # # # #
# # # #
# ]
409 : 0 : },
410 : 0 : [&](the_reference<NChar, not IsConst> &r) {
411 [ # # ]: 0 : the_reference<NChar, IsConst> ref(
412 [ # # # # ]: 0 : r.addr_, std::move(r.is_null_byte_), std::move(r.is_null_mask_)
413 : : );
414 [ # # ]: 0 : refs_.emplace(std::move(p.first), std::move(ref));
415 : 0 : },
416 : 0 : [](std::monostate) { M_unreachable("invalid variant"); },
417 : 0 : }, p.second);
418 : : }
419 : 0 : }
420 : :
421 : : public:
422 : 0 : ~the_entry() {
423 [ # # # # ]: 0 : for (auto &p : refs_)
424 [ # # # # ]: 0 : discard(p.second);
425 : 0 : }
426 : :
427 : : ///> Converts a non-const entry to a const entry. The former entry is moved, i.e. must not be used afterwards.
428 [ # # ]: 0 : operator the_entry<true>() requires (not IsConst) { return the_entry<true>(std::move(refs_)); }
429 : :
430 : : ///> Returns `true` iff `this` does not contain any references.
431 : 0 : bool empty() const { return refs_.empty(); }
432 : :
433 : : ///> Returns `true` iff `this` contains identifier \p id.
434 : 0 : bool has(const Schema::Identifier &id) const { return refs_.contains(id); }
435 : :
436 : : ///> Adds a mapping from identifier \p id to reference \p ref.
437 : : template<sql_type T>
438 : 0 : void add(Schema::Identifier id, the_reference<T, IsConst> &&ref) {
439 : 0 : refs_.emplace(std::move(id), std::forward<the_reference<T, IsConst>>(ref));
440 : 0 : }
441 : :
442 : : ///> Returns the **moved** entry for identifier \p id.
443 : 0 : value_t extract(const Schema::Identifier &id) {
444 : 0 : auto it = refs_.find(id);
445 : 0 : M_insist(it != refs_.end(), "identifier not found");
446 : 0 : auto nh = refs_.extract(it);
447 [ # # # # ]: 0 : return std::move(nh.mapped());
448 : 0 : }
449 : : ///> Returns the **moved** entry for identifier \p id.
450 : : template<sql_type T>
451 : 0 : the_reference<T, IsConst> extract(const Schema::Identifier &id) {
452 : : using type = the_reference<T, IsConst>;
453 : 0 : M_insist(refs_.count(id) <= 1, "duplicate identifier, lookup ambiguous");
454 : 0 : auto it = refs_.find(id);
455 : 0 : M_insist(it != refs_.end(), "identifier not found");
456 : 0 : auto nh = refs_.extract(it);
457 [ # # # # : 0 : M_insist(std::holds_alternative<type>(nh.mapped()));
# # # # #
# # # # #
# # ]
458 [ # # # # : 0 : return std::move(*std::get_if<type>(&nh.mapped()));
# # # # #
# # # # #
# # ]
459 : 0 : }
460 : :
461 : : ///> Returns the **copied** entry for identifier \p id.
462 : 0 : value_t get(const Schema::Identifier &id) const {
463 : 0 : M_insist(refs_.count(id) <= 1, "duplicate identifier, lookup ambiguous");
464 : 0 : auto it = refs_.find(id);
465 : 0 : M_insist(it != refs_.end(), "identifier not found");
466 : 0 : return std::visit(overloaded {
467 [ # # # # : 0 : [](auto &r) -> value_t { return r.clone(); },
# # # # #
# # # # #
# # ]
468 : 0 : [](std::monostate) -> value_t { M_unreachable("invalid reference"); },
469 : 0 : }, it->second);
470 : : }
471 : : ///> Returns the **copied** entry for identifier \p id.
472 : : template<sql_type T>
473 : 0 : the_reference<T, IsConst> get(const Schema::Identifier &id) const {
474 : : using type = the_reference<T, IsConst>;
475 : 0 : M_insist(refs_.count(id) <= 1, "duplicate identifier, lookup ambiguous");
476 : 0 : auto it = refs_.find(id);
477 : 0 : M_insist(it != refs_.end(), "identifier not found");
478 : 0 : M_insist(std::holds_alternative<type>(it->second));
479 : 0 : return std::get_if<type>(&it->second)->clone();
480 : : }
481 : : };
482 : :
483 : : using entry_t = the_entry<false>;
484 : : using const_entry_t = the_entry<true>;
485 : :
486 : : using callback_t = std::function<void(const_entry_t)>;
487 : : using hint_t = std::optional<Ptr<void>>;
488 : :
489 : : protected:
490 : : ///> Copies the vector of `SQL_t`s \p values.
491 : 0 : static std::vector<SQL_t> clone(const std::vector<SQL_t> &values) {
492 : 0 : std::vector<SQL_t> cpy;
493 [ # # ]: 0 : cpy.reserve(values.size());
494 [ # # ]: 0 : for (auto &value : values) {
495 [ # # ]: 0 : std::visit(overloaded {
496 [ # # # # : 0 : [&](auto &v) { cpy.push_back(v.clone()); },
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
497 : 0 : [](std::monostate) { M_unreachable("invalid variant"); },
498 : 0 : }, value);
499 : : }
500 : 0 : return cpy;
501 [ # # ]: 0 : }
502 : :
503 : : protected:
504 : : std::reference_wrapper<const Schema> schema_; ///< schema of hash table
505 : : std::vector<index_t> key_indices_; ///< keys of hash table
506 : : std::vector<index_t> value_indices_; ///< values of hash table
507 : :
508 : : public:
509 : : HashTable() = delete;
510 : :
511 : : /** Creates a hash table with schema \p schema and keys at \p key_indices. */
512 : 0 : HashTable(const Schema &schema, std::vector<index_t> key_indices)
513 : 0 : : schema_(std::cref(schema))
514 : 0 : , key_indices_(std::move(key_indices))
515 : 0 : {
516 [ # # # # ]: 0 : M_insist(schema.num_entries() != 0, "hash table schema must not be empty");
517 [ # # ]: 0 : M_insist(not key_indices_.empty(), "must specify a key");
518 : :
519 : : /*----- Compute `value_indices_` as complement of `key_indices_`. -----*/
520 [ # # # # ]: 0 : for (index_t i = 0; i != schema.num_entries(); ++i) {
521 [ # # # # ]: 0 : if (contains(key_indices_, i)) continue;
522 [ # # ]: 0 : value_indices_.push_back(i);
523 : 0 : }
524 : 0 : }
525 : :
526 : : HashTable(const HashTable&) = delete;
527 : : HashTable(HashTable&&) = default;
528 : :
529 : 0 : virtual ~HashTable() { }
530 : :
531 : : const Schema & schema() const { return schema_; }
532 : :
533 : : /** Performs the setup of the hash table. Must be called before any call to a setup method, i.e. setting the
534 : : * high watermark, or an access method, i.e. clearing, insertion, lookup, or dummy entry creation. */
535 : : virtual void setup() = 0;
536 : : /** Performs the teardown of the hash table. Must be called after all calls to a setup method, i.e. setting the
537 : : * high watermark, or an access method, i.e. clearing, insertion, lookup, or dummy entry creation. */
538 : : virtual void teardown() = 0;
539 : :
540 : : /** Sets the high watermark, i.e. the fraction of occupied entries before growing the hash table is required, to
541 : : * \p percentage. */
542 : : virtual void set_high_watermark(double percentage) = 0;
543 : :
544 : : /** Clears the hash table. */
545 : : virtual void clear() = 0;
546 : :
547 : : /** Computes the bucket for key \p key. Often used as hint for `find()` and `for_each_in_equal_range()`. */
548 : : virtual Ptr<void> compute_bucket(std::vector<SQL_t> key) const = 0;
549 : :
550 : : /** Inserts an entry into the hash table with key \p key regardless whether it already exists, i.e. duplicates
551 : : * are allowed. Returns a handle to the newly inserted entry which may be used to write the values for this
552 : : * entry. Rehashing of the hash table may be performed. Predication is supported, i.e. an entry is always
553 : : * inserted but can only be found later iff the predication predicate is fulfilled. */
554 : : virtual entry_t emplace(std::vector<SQL_t> key) = 0;
555 : : /** If no entry with key \p key already exists, inserts one into the hash table, i.e. no duplicates are inserted.
556 : : * Returns a pair of a handle to the entry with the given key which may be used to write the values for this
557 : : * entry and a boolean flag to indicate whether an insertion was performed. Rehashing of the hash table may be
558 : : * performed. Predication is supported, i.e. an entry is always inserted but can only be found later iff the
559 : : * predication predicate is fulfilled. */
560 : : virtual std::pair<entry_t, Boolx1> try_emplace(std::vector<SQL_t> key) = 0;
561 : :
562 : : /** Tries to find an entry with key \p key in the hash table. Uses \p bucket_hint as hint to the bucket address.
563 : : * Returns a pair of a handle to the found entry which may be used to both read and write the values of this entry
564 : : * (if none is found this handle points to an arbitrary entry and should be ignored) and a boolean flag to indicate
565 : : * whether an element with the specified key was found. Predication is supported, i.e. if the predication predicate
566 : : * is not fulfilled, no entry will be found. */
567 : : virtual std::pair<entry_t, Boolx1> find(std::vector<SQL_t> key, hint_t bucket_hint = hint_t()) = 0;
568 : : /** Tries to find an entry with key \p key in the hash table. Uses \p bucket_hint as hint to the bucket address.
569 : : * Returns a pair of a handle to the found entry which may be used to only read the values of this entry (if none is
570 : : * found this handle points to an arbitrary entry and should be ignored) and a boolean flag to indicate whether an
571 : : * element with the specified key was found. Predication is supported, i.e. if the predication predicate is not
572 : : * fulfilled, no entry will be found. */
573 : : std::pair<const_entry_t, Boolx1> find(std::vector<SQL_t> key, hint_t bucket_hint = hint_t()) const {
574 : : return const_cast<HashTable*>(this)->find(std::move(key), std::move(bucket_hint));
575 : : }
576 : :
577 : : /** Calls \p Pipeline for each entry contained in the hash table. At each call the argument is a handle to the
578 : : * respective entry which may be used to read both the keys and the values of this entry. */
579 : : virtual void for_each(callback_t Pipeline) const = 0;
580 : : /** Calls \p Pipeline for each entry with key \p key in the hash table, where the key comparison is performed
581 : : * predicated iff \p predicated is set. Uses \p bucket_hint as hint to the bucket address. At each call the
582 : : * argument is a handle to the respective entry which may be used to read both the keys and the values of this
583 : : * entry. Predication is supported, i.e. if the predication predicate is not fulfilled, the range of entries with
584 : : * an equal key will be empty. */
585 : : virtual void for_each_in_equal_range(std::vector<SQL_t> key, callback_t Pipeline, bool predicated = false,
586 : : hint_t bucket_hint = hint_t()) const = 0;
587 : :
588 : : /** Returns a handle to a newly created dummy entry which may be used to write the values for this entry.
589 : : * Note that even if the hash table is globally visible, this entry can only be used *locally*, i.e. within the
590 : : * function it is created in. */
591 : : virtual entry_t dummy_entry() = 0;
592 : :
593 : : protected:
594 : : /** Sets the byte offsets of an entry containing values of types \p types in \p offsets_in_bytes with the starting
595 : : * offset at \p initial_offset_in_bytes and an initial alignment requirement of \p initial_max_alignment_in_bytes.
596 : : * To minimize padding, the values are sorted by their alignment requirement. Returns the byte size of an entry
597 : : * and its alignments requirement in bytes. */
598 : : std::pair<size_t, size_t> set_byte_offsets(std::vector<offset_t> &offsets_in_bytes,
599 : : const std::vector<const Type*> &types,
600 : : offset_t initial_offset_in_bytes = 0,
601 : : offset_t initial_max_alignment_in_bytes = 1);
602 : : };
603 : :
604 : :
605 : : /*----- chained hash tables ------------------------------------------------------------------------------------------*/
606 : :
607 : : template<bool IsGlobal>
608 : : class chained_hash_table_storage;
609 : :
610 : : template<>
611 : : class chained_hash_table_storage<false> {};
612 : :
613 : : template<>
614 [ # # # # ]: 0 : class chained_hash_table_storage<true>
615 : : {
616 : : friend struct ChainedHashTable<true>;
617 : :
618 : : Global<Ptr<void>> address_; ///< global backup for address of hash table
619 : : Global<U32x1> mask_; ///< global backup for mask of hash table
620 : : Global<U32x1> num_entries_; ///< global backup for number of occupied entries of hash table
621 : : Global<U32x1> high_watermark_absolute_; ///< global backup for absolute high watermark of hash table
622 : : };
623 : :
624 : : template<bool IsGlobal>
625 : : struct ChainedHashTable : HashTable
626 : : {
627 : : private:
628 : : ///> variable type dependent on whether the hash table should be globally usable
629 : : template<typename T>
630 : : using var_t = std::conditional_t<IsGlobal, Global<T>, Var<T>>;
631 : :
632 : : HashTable::size_t entry_size_in_bytes_; ///< entry size in bytes
633 : : HashTable::size_t entry_max_alignment_in_bytes_; ///< alignment requirement in bytes of a single entry
634 : : std::vector<HashTable::offset_t> entry_offsets_in_bytes_; ///< entry offsets, i.e. offsets of keys and values
635 : : ///> offset of NULL bitmap; only specified if at least one entry is nullable
636 : : HashTable::offset_t null_bitmap_offset_in_bytes_;
637 : : HashTable::offset_t ptr_offset_in_bytes_; ///< offset of pointer to next entry in linked collision list
638 : :
639 : : std::optional<Var<Ptr<void>>> address_; ///< base address of hash table
640 : : ///> mask of hash table, i.e. number of buckets / collision lists minus 1; always a power of 2 minus 1
641 : : std::optional<Var<U32x1>> mask_;
642 : : std::optional<Var<U32x1>> num_entries_; ///< number of occupied entries of hash table
643 : : double high_watermark_percentage_ = 1.0; ///< fraction of occupied entries before growing the hash table is required
644 : : ///> maximum number of entries before growing the hash table is required
645 : : std::optional<Var<U32x1>> high_watermark_absolute_;
646 : : ///> if `IsGlobal`, contains backups for address, capacity, number of entries, and absolute high watermark
647 : : chained_hash_table_storage<IsGlobal> storage_;
648 : : ///> function to perform rehashing; only possible for global hash tables since variables have to be updated
649 : : std::optional<FunctionProxy<void(void)>> rehash_;
650 : : std::vector<std::pair<Ptr<void>, U32x1>> dummy_allocations_; ///< address-size pairs of dummy entry allocations
651 : : std::optional<var_t<Ptr<void>>> predication_dummy_; ///< dummy bucket used for predication
652 : :
653 : : public:
654 : : /** Creates a chained hash table with schema \p schema, keys at \p key_indices, and an initial capacity
655 : : * for \p initial_capacity buckets, i.e. collision lists. Emits code to allocate a fresh hash table. The hash
656 : : * table is globally visible iff \tparam IsGlobal. */
657 : : ChainedHashTable(const Schema &schema, std::vector<HashTable::index_t> key_indices, uint32_t initial_capacity);
658 : :
659 : : ChainedHashTable(ChainedHashTable&&) = default;
660 : :
661 : : ~ChainedHashTable();
662 : :
663 : : private:
664 : : /** Returns the address of the first bucket, i.e. the pointer to the first collision list. */
665 : 0 : Ptr<void> begin() const { M_insist(bool(address_), "must call `setup()` before"); return *address_; }
666 : : /** Returns the address of the past-the-end bucket. */
667 [ # # # # : 0 : Ptr<void> end() const { return begin() + size_in_bytes().make_signed(); }
# # # # #
# # # ]
668 : : /** Returns the mask of the hash table, i.e. capacity - 1U, which can be used to mask a hash value into the range
669 : : * of the hash table. */
670 : 0 : U32x1 mask() const { M_insist(bool(mask_), "must call `setup()` before"); return *mask_; }
671 : : /** Returns the capacity of the hash table. */
672 [ # # # # ]: 0 : U32x1 capacity() const { return mask() + 1U; }
673 : : /** Returns the overall size in bytes of the actual hash table, i.e. without collision list entries. */
674 [ # # # # ]: 0 : U32x1 size_in_bytes() const { return capacity() * uint32_t(sizeof(uint32_t)); }
675 : :
676 : : public:
677 : : /** Performs the setup of all local variables of the hash table (by reading them from the global backups iff
678 : : * \tparam IsGlobal). Must be called before any call to a setup method, i.e. setting the high watermark, or an
679 : : * access method, i.e. clearing, insertion, lookup, or dummy entry creation. */
680 : : void setup() override;
681 : : /** Performs the teardown of all local variables of the hash table (by storing them into the global backups iff
682 : : * \tparam IsGlobal). Must be called after all calls to a setup method, i.e. setting the high watermark, or an
683 : : * access method, i.e. clearing, insertion, lookup, or dummy entry creation. */
684 : : void teardown() override;
685 : :
686 : : /** Sets the high watermark, i.e. the fraction of occupied entries before growing the hash table is required, to
687 : : * \p percentage. */
688 : 0 : void set_high_watermark(double percentage) override {
689 [ # # # # ]: 0 : if (percentage < 1.0) {
690 : 0 : std::cerr << "warning: using chained collisions the load factor must be in [1,∞), ignore invalid value "
691 : 0 : << percentage << std::endl;
692 : 0 : return;
693 : : }
694 : 0 : high_watermark_percentage_ = percentage;
695 : 0 : update_high_watermark();
696 : 0 : }
697 : : private:
698 : : /** Updates internal high watermark variables according to the currently set high watermark percentage. */
699 : 0 : void update_high_watermark() {
700 : 0 : M_insist(bool(high_watermark_absolute_), "must call `setup()` before");
701 [ # # # # : 0 : auto high_watermark_absolute_new = high_watermark_percentage_ * capacity().make_signed().template to<double>();
# # # # #
# # # ]
702 [ # # # # : 0 : auto high_watermark_absolute_floored = high_watermark_absolute_new.template to<int32_t>().make_unsigned();
# # # # ]
703 [ # # # # : 0 : Wasm_insist(high_watermark_absolute_floored.clone() >= 1U,
# # # # #
# # # # #
# # ]
704 : : "at least one entry must be allowed to insert before growing the table");
705 [ # # # # ]: 0 : *high_watermark_absolute_ = high_watermark_absolute_floored;
706 : 0 : }
707 : :
708 : : /** Creates dummy entry for predication. */
709 : 0 : void create_predication_dummy() {
710 : 0 : M_insist(not predication_dummy_);
711 : 0 : predication_dummy_.emplace(); // since globals cannot be constructed with runtime values
712 [ # # # # ]: 0 : *predication_dummy_ = Module::Allocator().allocate(sizeof(uint32_t), sizeof(uint32_t));
713 [ # # # # : 0 : *predication_dummy_->template to<uint32_t*>() = 0U; // set to nullptr
# # # # ]
714 : 0 : }
715 : :
716 : : public:
717 : : void clear() override;
718 : :
719 : : Ptr<void> compute_bucket(std::vector<SQL_t> key) const override;
720 : :
721 : : entry_t emplace(std::vector<SQL_t> key) override;
722 : : std::pair<entry_t, Boolx1> try_emplace(std::vector<SQL_t> key) override;
723 : :
724 : : std::pair<entry_t, Boolx1> find(std::vector<SQL_t> key, hint_t bucket_hint) override;
725 : :
726 : : void for_each(callback_t Pipeline) const override;
727 : : void for_each_in_equal_range(std::vector<SQL_t> key, callback_t Pipeline, bool predicated,
728 : : hint_t bucket_hint) const override;
729 : :
730 : : entry_t dummy_entry() override;
731 : :
732 : : private:
733 : : /** Returns the bucket address for the key \p key by hashing it. */
734 : : Ptr<void> hash_to_bucket(std::vector<SQL_t> key) const;
735 : :
736 : : /** Inserts an entry into the hash table with key \p key regardless whether it already exists, i.e. duplicates
737 : : * are allowed. Returns a handle to the newly inserted entry which may be used to write the values for this
738 : : * entry. No rehashing of the hash table must be performed, i.e. the hash table must have at least
739 : : * one free entry slot. */
740 : : entry_t emplace_without_rehashing(std::vector<SQL_t> key);
741 : :
742 : : /** Compares the key of the entry at address \p entry with \p key and returns `true` iff they are equal. */
743 : : Boolx1 equal_key(Ptr<void> entry, std::vector<SQL_t> key) const;
744 : :
745 : : /** Inserts the key \p key into the entry at address \p entry. */
746 : : void insert_key(Ptr<void> entry, std::vector<SQL_t> key);
747 : :
748 : : /** Returns a handle for the entry at address \p entry which may be used to write the values of the corresponding
749 : : * entry. */
750 : : entry_t value_entry(Ptr<void> entry) const;
751 : : /** Returns a handle for the entry at address \p entry which may be used to read both the keys and the values of
752 : : * the corresponding entry. */
753 : : const_entry_t entry(Ptr<void> entry) const;
754 : :
755 : : /** Performs rehashing, i.e. resizes the hash table to the double of its capacity (by internally creating a new
756 : : * one) while asserting that all entries are still correctly contained in the resized hash table (by rehashing
757 : : * and reinserting but not reallocating them into the newly created hash table). */
758 : : void rehash();
759 : : };
760 : :
761 : : using LocalChainedHashTable = ChainedHashTable<false>;
762 : : using GlobalChainedHashTable = ChainedHashTable<true>;
763 : :
764 : :
765 : : /*----- open addressing hash tables ----------------------------------------------------------------------------------*/
766 : :
767 : : struct OpenAddressingHashTableBase : HashTable
768 : : {
769 : : friend struct ProbingStrategy;
770 : :
771 : : using ref_t = uint32_t; ///< 4 bytes for reference counting
772 : :
773 : : /** Probing strategy to handle collisions in an open addressing hash table. */
774 : : struct ProbingStrategy
775 : : {
776 : : protected:
777 : : const OpenAddressingHashTableBase &ht_; ///< open addressing hash table which uses `this` probing strategy
778 : :
779 : : public:
780 : 0 : ProbingStrategy(const OpenAddressingHashTableBase &ht) : ht_(ht) { }
781 : :
782 : 0 : virtual ~ProbingStrategy() { }
783 : :
784 : : /** Returns the address of the \p skips -th (starting with index 0) slot in the bucket starting at \p bucket. */
785 : : virtual Ptr<void> skip_slots(Ptr<void> bucket, U32x1 skips) const = 0;
786 : : /** Returns the address of the \p current_step -th slot (starting with index 0) of a bucket which follows the
787 : : * slot \p slot. */
788 : : virtual Ptr<void> advance_to_next_slot(Ptr<void> slot, U32x1 current_step) const = 0;
789 : : };
790 : :
791 : : protected:
792 : : std::unique_ptr<ProbingStrategy> probing_strategy_; ///< probing strategy to handle collisions
793 : : HashTable::offset_t refs_offset_in_bytes_; ///< offset in bytes for reference counter
794 : : HashTable::size_t entry_size_in_bytes_; ///< entry size in bytes
795 : : HashTable::size_t entry_max_alignment_in_bytes_; ///< alignment requirement in bytes of a single entry
796 : 0 : double high_watermark_percentage_ = 1.0; ///< fraction of occupied entries before growing the hash table is required
797 : :
798 : : public:
799 : : /** Creates an open addressing hash table with schema \p schema and keys at \p key_indices. */
800 : 0 : OpenAddressingHashTableBase(const Schema &schema, std::vector<index_t> key_indices)
801 [ # # ]: 0 : : HashTable(schema, std::move(key_indices))
802 : 0 : { }
803 : :
804 : : /** Returns the address of the first entry. */
805 : : virtual Ptr<void> begin() const = 0;
806 : : /** Returns the address of the past-the-end entry. */
807 : : virtual Ptr<void> end() const = 0;
808 : : /** Returns the mask of the hash table, i.e. capacity - 1U, which can be used to mask a hash value into the range
809 : : * of the hash table. */
810 : : virtual U32x1 mask() const = 0;
811 : : /** Returns the capacity of the hash table. */
812 [ # # ]: 0 : U32x1 capacity() const { return mask() + 1U; }
813 : : /** Returns the overall size in bytes of the hash table. */
814 [ # # ]: 0 : U32x1 size_in_bytes() const { return capacity() * entry_size_in_bytes_; }
815 : : /** Returns the size in bytes of a single entry in the hash table. */
816 : 0 : HashTable::size_t entry_size_in_bytes() const { return entry_size_in_bytes_; }
817 : :
818 : : /** Sets the hash table's probing strategy to \tparam T. */
819 : : template<typename T>
820 : 0 : void set_probing_strategy() { probing_strategy_ = std::make_unique<T>(*this); }
821 : : protected:
822 : : /** Returns the currently used probing strategy of the hash table. */
823 : 0 : const ProbingStrategy & probing_strategy() const { M_insist(bool(probing_strategy_)); return *probing_strategy_; }
824 : :
825 : : /** Returns a `Reference` to the reference counter for the entry at address \p entry. */
826 [ # # # # ]: 0 : Reference<ref_t> reference_count(Ptr<void> entry) const { return *(entry + refs_offset_in_bytes_).to<ref_t*>(); }
827 : :
828 : : public:
829 : : /** Sets the high watermark, i.e. the fraction of occupied entries before growing the hash table is required, to
830 : : * \p percentage. */
831 : 0 : void set_high_watermark(double percentage) override {
832 [ # # # # ]: 0 : if (percentage < 0.5 or percentage >= 1.0) {
833 : 0 : std::cerr << "warning: using open addressing the load factor must be in [0.5,1), ignore invalid value "
834 : 0 : << percentage << std::endl;
835 : 0 : return;
836 : : }
837 : 0 : high_watermark_percentage_ = percentage;
838 : 0 : update_high_watermark();
839 : 0 : }
840 : : protected:
841 : : /** Updates internal high watermark variables according to the currently set high watermark percentage. */
842 : 0 : virtual void update_high_watermark() { }
843 : :
844 : : public:
845 : : void clear() override;
846 : :
847 : : protected:
848 : : /** Returns the bucket address for the key \p key by hashing it. */
849 : : Ptr<void> hash_to_bucket(std::vector<SQL_t> key) const;
850 : : };
851 : :
852 : : template<bool ValueInPlace>
853 : : class open_addressing_hash_table_layout;
854 : :
855 : : template<>
856 : : class open_addressing_hash_table_layout<true>
857 : : {
858 : : friend struct OpenAddressingHashTable<false, true>;
859 : : friend struct OpenAddressingHashTable<true, true>;
860 : :
861 : : std::vector<HashTable::offset_t> entry_offsets_in_bytes_;
862 : : HashTable::offset_t null_bitmap_offset_in_bytes_; ///< only specified if at least one entry is nullable
863 : : };
864 : :
865 : : template<>
866 : : class open_addressing_hash_table_layout<false>
867 : : {
868 : : friend struct OpenAddressingHashTable<false, false>;
869 : : friend struct OpenAddressingHashTable<true, false>;
870 : :
871 : : std::vector<HashTable::offset_t> key_offsets_in_bytes_;
872 : : std::vector<HashTable::offset_t> value_offsets_in_bytes_;
873 : : HashTable::offset_t ptr_offset_in_bytes_; ///< pointer to out-of-place values
874 : : HashTable::size_t values_size_in_bytes_;
875 : : HashTable::size_t values_max_alignment_in_bytes_;
876 : : HashTable::offset_t keys_null_bitmap_offset_in_bytes_; ///< only specified if at least one key entry is nullable
877 : : HashTable::offset_t values_null_bitmap_offset_in_bytes_; ///< only specified if at least one value entry is nullable
878 : : };
879 : :
880 : : template<bool IsGlobal>
881 : : class open_addressing_hash_table_storage;
882 : :
883 : : template<>
884 : : class open_addressing_hash_table_storage<false> {};
885 : :
886 : : template<>
887 [ # # # # ]: 0 : class open_addressing_hash_table_storage<true>
888 : : {
889 : : friend struct OpenAddressingHashTable<true, false>;
890 : : friend struct OpenAddressingHashTable<true, true>;
891 : :
892 : : Global<Ptr<void>> address_; ///< global backup for address of hash table
893 : : Global<U32x1> mask_; ///< global backup for mask of hash table
894 : : Global<U32x1> num_entries_; ///< global backup for number of occupied entries of hash table
895 : : Global<U32x1> high_watermark_absolute_; ///< global backup for absolute high watermark of hash table
896 : : };
897 : :
898 : : template<bool IsGlobal, bool ValueInPlace>
899 : : struct OpenAddressingHashTable : OpenAddressingHashTableBase
900 : : {
901 : : private:
902 : : ///> variable type dependent on whether the hash table should be globally usable
903 : : template<typename T>
904 : : using var_t = std::conditional_t<IsGlobal, Global<T>, Var<T>>;
905 : :
906 : : open_addressing_hash_table_layout<ValueInPlace> layout_; ///< layout of hash table
907 : : std::optional<Var<Ptr<void>>> address_; ///< base address of hash table
908 : : std::optional<Var<U32x1>> mask_; ///< mask of hash table; always a power of 2 minus 1, i.e. 0b0..01..1
909 : : std::optional<Var<U32x1>> num_entries_; ///< number of occupied entries of hash table
910 : : std::optional<Var<U32x1>> high_watermark_absolute_; ///< maximum number of entries before growing the hash table is required
911 : : ///> if `IsGlobal`, contains backups for address, capacity, number of entries, and absolute high watermark
912 : : open_addressing_hash_table_storage<IsGlobal> storage_;
913 : : ///> function to perform rehashing; only possible for global hash tables since variables have to be updated
914 : : std::optional<FunctionProxy<void(void)>> rehash_;
915 : : std::vector<std::pair<Ptr<void>, U32x1>> dummy_allocations_; ///< address-size pairs of dummy entry allocations
916 : : std::optional<var_t<Ptr<void>>> predication_dummy_; ///< dummy entry used for predication
917 : :
918 : : public:
919 : : /** Creates an open addressing hash table with schema \p schema, keys at \p key_indices, and an initial capacity
920 : : * for \p initial_capacity entries. Emits code to allocate a fresh hash table. The hash table is globally visible
921 : : * iff \tparam IsGlobal and the values are stores in-place iff \tparam ValueInPlace. */
922 : : OpenAddressingHashTable(const Schema &schema, std::vector<HashTable::index_t> key_indices,
923 : : uint32_t initial_capacity);
924 : :
925 : : OpenAddressingHashTable(OpenAddressingHashTable&&) = default;
926 : :
927 : : ~OpenAddressingHashTable();
928 : :
929 : : private:
930 : 0 : Ptr<void> begin() const override { M_insist(bool(address_), "must call `setup()` before"); return *address_; }
931 [ # # # # : 0 : Ptr<void> end() const override { return begin() + (capacity() * entry_size_in_bytes_).make_signed(); }
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
932 : 0 : U32x1 mask() const override { M_insist(bool(mask_), "must call `setup()` before"); return *mask_; }
933 : :
934 : : public:
935 : : /** Performs the setup of all local variables of the hash table (by reading them from the global backups iff
936 : : * \tparam IsGlobal). Must be called before any call to a setup method, i.e. setting the high watermark, or an
937 : : * access method, i.e. clearing, insertion, lookup, or dummy entry creation. */
938 : : void setup() override;
939 : : /** Performs the teardown of all local variables of the hash table (by storing them into the global backups iff
940 : : * \tparam IsGlobal). Must be called after all calls to a setup method, i.e. setting the high watermark, or an
941 : : * access method, i.e. clearing, insertion, lookup, or dummy entry creation. */
942 : : void teardown() override;
943 : :
944 : : private:
945 : 0 : void update_high_watermark() override {
946 : 0 : M_insist(bool(high_watermark_absolute_), "must call `setup()` before");
947 [ # # # # : 0 : auto _capacity = capacity().make_signed().template to<double>();
# # # # #
# # # # #
# # ]
948 [ # # # # : 0 : const Var<U32x1> high_watermark_absolute_new(
# # # # ]
949 [ # # # # : 0 : (high_watermark_percentage_ * _capacity).ceil().template to<int32_t>().make_unsigned()
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
950 : : );
951 [ # # # # : 0 : *high_watermark_absolute_ =
# # # # ]
952 [ # # # # : 0 : Select(capacity() <= high_watermark_absolute_new, capacity() - 1U, high_watermark_absolute_new);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
953 [ # # # # : 0 : Wasm_insist(*high_watermark_absolute_ >= 1U,
# # # # #
# # # # #
# # # # #
# # # #
# ]
954 : : "at least one entry must be allowed to insert before growing the table");
955 [ # # # # : 0 : Wasm_insist(*high_watermark_absolute_ < capacity(), "at least one entry must always be unoccupied for lookups");
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
956 : 0 : }
957 : :
958 : : /** Creates dummy entry for predication. */
959 : 0 : void create_predication_dummy() {
960 : 0 : M_insist(not predication_dummy_);
961 : 0 : predication_dummy_.emplace(); // since globals cannot be constructed with runtime values
962 [ # # # # : 0 : *predication_dummy_ = Module::Allocator().allocate(entry_size_in_bytes_, entry_max_alignment_in_bytes_);
# # # # ]
963 : 0 : dummy_allocations_.emplace_back(*predication_dummy_, entry_size_in_bytes_);
964 [ # # # # : 0 : reference_count(*predication_dummy_) = ref_t(0); // set unoccupied
# # # # #
# # # # #
# # ]
965 : 0 : }
966 : :
967 : : public:
968 : : Ptr<void> compute_bucket(std::vector<SQL_t> key) const override;
969 : :
970 : : entry_t emplace(std::vector<SQL_t> key) override;
971 : : std::pair<entry_t, Boolx1> try_emplace(std::vector<SQL_t> key) override;
972 : :
973 : : std::pair<entry_t, Boolx1> find(std::vector<SQL_t> key, hint_t bucket_hint) override;
974 : :
975 : : void for_each(callback_t Pipeline) const override;
976 : : void for_each_in_equal_range(std::vector<SQL_t> key, callback_t Pipeline, bool predicated,
977 : : hint_t bucket_hint) const override;
978 : :
979 : : entry_t dummy_entry() override;
980 : :
981 : : private:
982 : : /** Inserts an entry into the hash table with key \p key regardless whether it already exists, i.e. duplicates
983 : : * are allowed. Returns a pointer to the newly inserted slot without allocating any space for possible
984 : : * out-of-place values. No rehashing of the hash table must be performed, i.e. the hash table must have at least
985 : : * one free entry slot. */
986 : : Ptr<void> emplace_without_rehashing(std::vector<SQL_t> key);
987 : :
988 : : /** Compares the key of the slot at address \p slot with \p key and returns `true` iff they are equal. */
989 : : Boolx1 equal_key(Ptr<void> slot, std::vector<SQL_t> key) const;
990 : :
991 : : /** Inserts the key \p key into the slot at address \p slot. */
992 : : void insert_key(Ptr<void> slot, std::vector<SQL_t> key);
993 : :
994 : : /** Returns a handle which may be used to write the values of the corresponding entry. If \tparam ValueInPlace,
995 : : * the given pointer \p ptr is interpreted as slot pointer, otherwise, it is interpreted as pointer to the
996 : : * out-of-place values. */
997 : : entry_t value_entry(Ptr<void> ptr) const;
998 : : /** Returns a handle for the slot at address \p slot which may be used to read both the keys and the values of
999 : : * the corresponding entry. */
1000 : : const_entry_t entry(Ptr<void> slot) const;
1001 : :
1002 : : /** Performs rehashing, i.e. resizes the hash table to the double of its capacity (by internally creating a new
1003 : : * one) while asserting that all entries are still correctly contained in the resized hash table (by rehashing
1004 : : * and reinserting them into the newly created hash table). */
1005 : : void rehash();
1006 : : };
1007 : :
1008 : : using LocalOpenAddressingOutOfPlaceHashTable = OpenAddressingHashTable<false, false>;
1009 : : using GlobalOpenAddressingOutOfPlaceHashTable = OpenAddressingHashTable<true, false>;
1010 : : using LocalOpenAddressingInPlaceHashTable = OpenAddressingHashTable<false, true>;
1011 : : using GlobalOpenAddressingInPlaceHashTable = OpenAddressingHashTable<true, true>;
1012 : :
1013 : :
1014 : : /*----- probing strategies for open addressing hash tables -----------------------------------------------------------*/
1015 : :
1016 : : /** Linear probing strategy, i.e. always the following slot in a bucket is accessed. */
1017 : : struct LinearProbing : OpenAddressingHashTableBase::ProbingStrategy
1018 : : {
1019 : 0 : LinearProbing(const OpenAddressingHashTableBase &ht) : OpenAddressingHashTableBase::ProbingStrategy(ht) { }
1020 : :
1021 : : Ptr<void> skip_slots(Ptr<void> bucket, U32x1 skips) const override;
1022 : : Ptr<void> advance_to_next_slot(Ptr<void> slot, U32x1 current_step) const override;
1023 : : };
1024 : :
1025 : : /** Quadratic probing strategy, i.e. at each step i, the slot to access next in a bucket is computed by skipping i
1026 : : * slots, e.g. the thirdly accessed slot in a bucket is slot number 1+2+3=6. */
1027 : : struct QuadraticProbing : OpenAddressingHashTableBase::ProbingStrategy
1028 : : {
1029 : 0 : QuadraticProbing(const OpenAddressingHashTableBase &ht) : OpenAddressingHashTableBase::ProbingStrategy(ht) { }
1030 : :
1031 : : Ptr<void> skip_slots(Ptr<void> bucket, U32x1 skips) const override;
1032 : : Ptr<void> advance_to_next_slot(Ptr<void> slot, U32x1 current_step) const override;
1033 : : };
1034 : :
1035 : :
1036 : : /*======================================================================================================================
1037 : : * explicit instantiation declarations
1038 : : *====================================================================================================================*/
1039 : :
1040 : : extern template void quicksort<false>(GlobalBuffer&, const std::vector<SortingOperator::order_type>&);
1041 : : extern template void quicksort<true>(GlobalBuffer&, const std::vector<SortingOperator::order_type>&);
1042 : : extern template struct m::wasm::ChainedHashTable<false>;
1043 : : extern template struct m::wasm::ChainedHashTable<true>;
1044 : : extern template struct m::wasm::OpenAddressingHashTable<false, false>;
1045 : : extern template struct m::wasm::OpenAddressingHashTable<false, true>;
1046 : : extern template struct m::wasm::OpenAddressingHashTable<true, false>;
1047 : : extern template struct m::wasm::OpenAddressingHashTable<true, true>;
1048 : :
1049 : : }
1050 : :
1051 : : }
|