Branch data Line data Source code
1 : : #pragma once
2 : :
3 : : #include "storage/ColumnStore.hpp"
4 : : #include <mutable/catalog/Schema.hpp>
5 : : #include <random>
6 : : #include <utility>
7 : : #include <vector>
8 : :
9 : :
10 : : namespace m {
11 : :
12 : : /** Sets all attributes of the `begin`-th row (including) to the `end`-th row (excluding) of column at
13 : : * address `column_ptr` to NULL. */
14 : : void set_all_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end);
15 : :
16 : : /** Sets all attributes of the `begin`-th row (including) to the `end`-th row (excluding) of column at
17 : : * address `column_ptr` to NOT NULL. */
18 : : void set_all_not_null(uint8_t *column_ptr, std::size_t num_attrs, std::size_t begin, std::size_t end);
19 : :
20 : : /** Generates primary keys of `Type` `type` for the `begin`-th row (including) to the `end`-th row (excluding)
21 : : * of column at address `column_ptr`. */
22 : : void generate_primary_keys(void *column_ptr, const Type &type, std::size_t begin, std::size_t end);
23 : :
24 : : /** Fills column at address `column_ptr` from `begin`-th row (including) to `end`-th row (excluding) with data from
25 : : * `values`. Initially, shuffles `values` with `g` as URBG. Then, uses each value of `values` exactly once. After
26 : : * all elements of `values` were used exactly once, shuffles `values` again with `g` as URBG and repeats. */
27 : : template<typename T, typename Generator = std::mt19937_64>
28 : : std::enable_if_t<std::is_arithmetic_v<T>, void>
29 : : M_EXPORT
30 : 1 : fill_uniform(T *column_ptr, std::vector<T> values, std::size_t begin, std::size_t end, Generator &&g = Generator()) {
31 : 1 : M_insist(begin < end, "must set at least one row");
32 : :
33 : 1 : const auto count = end - begin;
34 : :
35 : : /* Write distinct values repeatedly in arbitrary order to column. */
36 : 1 : auto ptr = column_ptr + begin;
37 [ # # # # : 26 : for (std::size_t i = 0; i != count; ) {
- + # # #
# # # ]
38 : : /* Shuffle the vector before writing its values to the column. */
39 : 26 : std::shuffle(values.begin(), values.end(), std::forward<Generator>(g));
40 [ # # # # : 282 : for (auto v : values) {
+ + # # #
# # # ]
41 : 257 : *ptr++ = v;
42 : 257 : ++i;
43 [ # # # # : 257 : if (i == count)
+ + # # #
# # # ]
44 : 1 : goto exit;
45 : : }
46 : 0 : }
47 : : exit:
48 : 1 : M_insist(ptr - column_ptr == long(count), "incorrect number of elements written");
49 : 1 : }
50 : :
51 : : /** Generates data for the column at address `column_ptr` from `begin`-th row (including) to `end`-th row (excluding)
52 : : * and writes it directly to memory. The rows must have been allocated before calling this function. */
53 : : void generate_column_data(void *column_ptr, const Attribute &attr, std::size_t num_distinct_values, std::size_t begin,
54 : : std::size_t end);
55 : :
56 : : /** Generates data for two columns at addresses `left_ptr` and `right_ptr` correlated by `num_distinct_values_matching`
57 : : * and writes the data directly to memory. The rows must have been allocated before calling this function. */
58 : : void generate_correlated_column_data(void *left_ptr, void *right_ptr, const Attribute &attr,
59 : : std::size_t num_distinct_values_left, std::size_t num_distinct_values_right,
60 : : std::size_t count_left, std::size_t count_right,
61 : : std::size_t num_distinct_values_matching);
62 : :
63 : : }
|