LCOV - code coverage report
Current view: top level - src/storage - DataLayoutFactory.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 124 130 95.4 %
Date: 2025-03-25 01:19:55 Functions: 7 11 63.6 %
Branches: 89 154 57.8 %

           Branch data     Line data    Source code
       1                 :            : #include <mutable/storage/DataLayoutFactory.hpp>
       2                 :            : 
       3                 :            : #include <memory>
       4                 :            : #include <mutable/catalog/Catalog.hpp>
       5                 :            : #include <mutable/catalog/Type.hpp>
       6                 :            : #include <numeric>
       7                 :            : 
       8                 :            : 
       9                 :            : using namespace m;
      10                 :            : using namespace m::storage;
      11                 :            : 
      12                 :            : M_LCOV_EXCL_START
      13                 :            : std::ostream & m::storage::operator<<(std::ostream &out, const DataLayoutFactory &factory)
      14                 :            : {
      15                 :            :     factory.print(out);
      16                 :            :     return out;
      17                 :            : }
      18                 :            : void DataLayoutFactory::dump(std::ostream &out) const { out << *this << std::endl; }
      19                 :            : void DataLayoutFactory::dump() const { dump(std::cerr); }
      20                 :            : M_LCOV_EXCL_STOP
      21                 :            : 
      22                 :            : namespace {
      23                 :            : 
      24                 :            : namespace options {
      25                 :            : 
      26                 :            : /** Whether to reorder attributes when creating data layouts. */
      27                 :            : bool attribute_reordering = true;
      28                 :            : 
      29                 :            : /** Whether to remove the NULL bitmap in all created data layouts. */
      30                 :            : bool remove_null_bitmap = false;
      31                 :            : 
      32                 :            : /** Whether to pack one tuple less than theoretically possible in a created PAX data layout. */
      33                 :            : bool pax_pack_one_tuple_less = false;
      34                 :            : 
      35                 :            : }
      36                 :            : 
      37                 :            : __attribute__((constructor(201)))
      38                 :          1 : static void add_storage_args()
      39                 :            : {
      40                 :          1 :     Catalog &C = Catalog::Get();
      41                 :            : 
      42                 :            :     /*----- Command-line arguments -----*/
      43                 :          1 :     C.arg_parser().add<bool>(
      44                 :            :         /* group=       */ "Storage",
      45                 :            :         /* short=       */ nullptr,
      46                 :            :         /* long=        */ "--no-attribute-reordering",
      47                 :            :         /* description= */ "do not reorder attributes when creating data layouts, e.g. to minimize padding",
      48                 :          0 :         /* callback=    */ [](bool){ options::attribute_reordering = false; }
      49                 :            :     );
      50                 :          1 :     C.arg_parser().add<bool>(
      51                 :            :         /* group=       */ "Storage",
      52                 :            :         /* short=       */ nullptr,
      53                 :            :         /* long=        */ "--remove-null-bitmap",
      54                 :            :         /* description= */ "remove the NULL bitmap in all created data layouts",
      55                 :          0 :         /* callback=    */ [](bool){ options::remove_null_bitmap = true; }
      56                 :            :     );
      57                 :          1 :     C.arg_parser().add<bool>(
      58                 :            :         /* group=       */ "Storage",
      59                 :            :         /* short=       */ nullptr,
      60                 :            :         /* long=        */ "--pax-pack-one-tuple-less",
      61                 :            :         /* description= */ "pack one tuple less than possible into PAX blocks; only used for benchmarking purposes",
      62                 :          0 :         /* callback=    */ [](bool){ options::pax_pack_one_tuple_less = true; }
      63                 :            :     );
      64                 :          1 : }
      65                 :            : 
      66                 :            : }
      67                 :          0 : 
      68                 :            : 
      69                 :            : /** Computes the order for attributes of types \p types and returns this permutation as array of indices.  Attributes
      70                 :            :  * are reordered by their alignment requirement to minimize padding except the CLI option `--no-attribute-reordering`
      71                 :            :  * is set. */
      72                 :            : std::unique_ptr<std::size_t[]>
      73                 :        338 : compute_attribute_order(const std::vector<const Type*> &types)
      74                 :          1 : {
      75                 :            :     /*----- Collect all indices. -----*/
      76                 :        338 :     auto indices = std::make_unique<std::size_t[]>(types.size());
      77         [ +  - ]:        338 :     std::iota(indices.get(), indices.get() + types.size(), 0);
      78                 :            : 
      79         [ +  - ]:        338 :     if (options::attribute_reordering) {
      80                 :            :         /*----- Sort indices by alignment. -----*/
      81         [ +  - ]:       1416 :         std::stable_sort(indices.get(), indices.get() + types.size(), [&](std::size_t left, std::size_t right) {
      82                 :       1078 :             return types[left]->alignment() > types[right]->alignment();
      83                 :            :         });
      84                 :        338 :     }
      85                 :            : 
      86                 :        338 :     return indices;
      87         [ +  - ]:        338 : }
      88                 :            : 
      89                 :         17 : DataLayout RowLayoutFactory::make(std::vector<const Type*> types, std::size_t num_tuples) const
      90                 :            : {
      91                 :         17 :     M_insist(not types.empty(), "cannot make layout for zero types");
      92                 :            : 
      93                 :         17 :     auto indices = compute_attribute_order(types);
      94                 :         17 :     uint64_t offsets[types.size()]; // in bits
      95                 :            : 
      96                 :            :     /*----- Compute offsets. -----*/
      97                 :         17 :     uint64_t offset_in_bits = 0;
      98                 :         17 :     uint64_t alignment_in_bits = 8;
      99                 :            : 
     100         [ +  + ]:        107 :     for (std::size_t idx = 0; idx != types.size(); ++idx) {
     101         [ +  - ]:         90 :         const auto mapped_idx = indices[idx];
     102                 :         90 :         offsets[mapped_idx] = offset_in_bits;
     103         [ +  - ]:         90 :         offset_in_bits += types[mapped_idx]->size();
     104   [ +  -  +  - ]:         90 :         alignment_in_bits = std::max(alignment_in_bits, types[mapped_idx]->alignment());
     105                 :         90 :     }
     106                 :            : 
     107                 :         17 :     const uint64_t null_bitmap_offset = offset_in_bits;
     108                 :            : 
     109                 :            :     /*----- Compute row size with padding. -----*/
     110         [ -  + ]:         17 :     if (not options::remove_null_bitmap)
     111                 :         17 :         offset_in_bits += types.size(); // space for NULL bitmap
     112         [ +  + ]:         17 :     if (uint64_t rem = offset_in_bits % alignment_in_bits; rem)
     113                 :         15 :         offset_in_bits += alignment_in_bits - rem;
     114                 :         17 :     const uint64_t row_size_in_bits = offset_in_bits;
     115                 :            : 
     116                 :            :     /*----- Construct DataLayout. -----*/
     117         [ +  - ]:         17 :     DataLayout layout(num_tuples);
     118         [ +  - ]:         17 :     auto &row = layout.add_inode(1, row_size_in_bits);
     119         [ +  + ]:        107 :     for (std::size_t idx = 0; idx != types.size(); ++idx)
     120         [ +  - ]:         90 :         row.add_leaf(types[idx], idx, offsets[idx], 0); // add attribute
     121         [ +  - ]:         17 :     if (not options::remove_null_bitmap) {
     122         [ +  - ]:         34 :         row.add_leaf( // add NULL bitmap
     123   [ +  -  +  - ]:         17 :             /* type=           */ Type::Get_Bitmap(Type::TY_Vector, types.size()),
     124                 :         17 :             /* idx=            */ types.size(),
     125                 :         17 :             /* offset_in_bits= */ null_bitmap_offset,
     126                 :            :             /* stride_in_bits= */ 0
     127                 :            :         );
     128                 :         17 :     }
     129                 :            : 
     130                 :         17 :     return layout;
     131         [ +  - ]:         17 : }
     132                 :            : 
     133                 :        321 : DataLayout PAXLayoutFactory::make(std::vector<const Type*> types, std::size_t num_tuples) const
     134                 :            : {
     135                 :        321 :     M_insist(not types.empty(), "cannot make layout for zero types");
     136                 :            : 
     137                 :        321 :     auto indices = compute_attribute_order(types);
     138                 :        321 :     uint64_t offsets[types.size() + 1]; // in bits
     139                 :            : 
     140                 :            :     /*----- Compute attribute offsets in a virtual row. -----*/
     141                 :        321 :     uint64_t offset_in_bits = 0;
     142                 :        321 :     uint64_t min_size_in_bytes = std::numeric_limits<uint64_t>::max();
     143                 :        321 :     uint64_t alignment_in_bits = 8;
     144                 :        321 :     std::size_t num_not_byte_aligned = 0;
     145                 :            : 
     146         [ +  + ]:       1137 :     for (std::size_t idx = 0; idx != types.size(); ++idx) {
     147         [ +  - ]:        816 :         const auto mapped_idx = indices[idx];
     148                 :        816 :         offsets[mapped_idx] = offset_in_bits;
     149         [ +  - ]:        816 :         offset_in_bits += types[mapped_idx]->size();
     150   [ +  -  +  - ]:        816 :         min_size_in_bytes = std::min(min_size_in_bytes, (types[mapped_idx]->size() + 7) / 8);
     151   [ +  -  +  - ]:        816 :         alignment_in_bits = std::max(alignment_in_bits, types[mapped_idx]->alignment());
     152   [ +  -  +  + ]:        816 :         if (types[mapped_idx]->size() % 8)
     153                 :         19 :             ++num_not_byte_aligned;
     154                 :        816 :     }
     155                 :            : 
     156                 :            :     /*----- Compute NULL bitmap offset in a virtual row. -----*/
     157                 :        321 :     const uint64_t null_bitmap_size_in_bits =
     158   [ +  -  +  -  :        321 :         options::remove_null_bitmap ? 0 : std::max(ceil_to_pow_2(types.size()), 8UL); // add padding to support SIMDfication
                   +  - ]
     159                 :        321 :     offsets[types.size()] = offset_in_bits;
     160         [ +  - ]:        321 :     if (null_bitmap_size_in_bits % 8)
     161                 :          0 :         ++num_not_byte_aligned;
     162                 :            : 
     163                 :            :     /*----- Compute number of rows per block and number of blocks per row. -----*/
     164         [ +  - ]:        321 :     const auto num_simd_lanes = std::max<std::size_t>(1, 16 / min_size_in_bytes); // possible number of SIMD lanes
     165                 :            :     std::size_t num_rows_per_block, num_blocks_per_row;
     166         [ +  + ]:        321 :     if (NTuples == option_) {
     167                 :         25 :         num_rows_per_block = num_tuples_;
     168         [ +  + ]:         25 :         if (num_rows_per_block > num_simd_lanes)
     169                 :         22 :             num_rows_per_block =
     170                 :         22 :                 (num_tuples_ / num_simd_lanes) * num_simd_lanes; // floor to multiple of possible number of SIMD lanes
     171                 :         25 :         num_blocks_per_row = 1;
     172                 :         25 :     } else {
     173                 :        296 :         const uint64_t row_size_in_bits = offsets[types.size()] + null_bitmap_size_in_bits; // space for NULL bitmap
     174                 :            :         /* Compute number of rows within a PAX block. Consider worst case padding of 7 bits (because each column within
     175                 :            :          * a PAX block must be byte aligned) for every possibly not byte-aligned attribute column. Null bitmap column is
     176                 :            :          * ignored since it is the last column. */
     177         [ +  - ]:        296 :         num_rows_per_block = std::max<std::size_t>(1, (num_bytes_ * 8 - num_not_byte_aligned * 7) / row_size_in_bits);
     178         [ +  + ]:        296 :         if (num_rows_per_block > num_simd_lanes)
     179                 :        295 :             num_rows_per_block =
     180                 :        295 :                 (num_rows_per_block / num_simd_lanes) * num_simd_lanes; // floor to multiple of possible number of SIMD lanes
     181   [ -  +  #  # ]:        296 :         if (options::pax_pack_one_tuple_less and num_rows_per_block > 1)
     182                 :          0 :             --num_rows_per_block;
     183                 :        296 :         num_blocks_per_row = (row_size_in_bits + num_bytes_ * 8 - 1UL) / (num_bytes_ * 8);
     184                 :            :     }
     185                 :            : 
     186                 :            :     /*----- Compute column offsets. -----*/
     187                 :        321 :     uint64_t running_padding = 0;
     188         [ +  + ]:       1137 :     for (std::size_t idx = 0; idx != types.size(); ++idx) {
     189         [ +  - ]:        816 :         const auto mapped_idx = indices[idx];
     190                 :        816 :         offsets[mapped_idx] = offsets[mapped_idx] * num_rows_per_block + running_padding;
     191         [ +  - ]:        816 :         M_insist(offsets[mapped_idx] % 8 == 0, "attribute column must be byte aligned");
     192   [ +  -  +  + ]:        816 :         if (uint64_t bit_offset = (types[mapped_idx]->size() * num_rows_per_block) % 8; bit_offset)
     193                 :          6 :             running_padding += 8UL - bit_offset;
     194                 :        816 :     }
     195                 :        321 :     offsets[types.size()] = offsets[types.size()] * num_rows_per_block + running_padding;
     196                 :            : 
     197                 :            :     /*----- Compute block size. -----*/
     198                 :            :     uint64_t block_size_in_bits;
     199         [ +  + ]:        321 :     if (NTuples == option_) {
     200                 :         25 :         block_size_in_bits = offsets[types.size()] + null_bitmap_size_in_bits * num_rows_per_block;
     201         [ +  + ]:         25 :         if (uint64_t alignment_offset = block_size_in_bits % alignment_in_bits)
     202                 :          3 :             block_size_in_bits += alignment_in_bits - alignment_offset;
     203                 :         25 :     } else {
     204                 :        296 :         block_size_in_bits = num_bytes_ * 8;
     205                 :            :     }
     206                 :            : 
     207         [ +  - ]:        321 :     M_insist(offsets[types.size()] % 8 == 0, "NULL bitmap column must be byte aligned");
     208         [ +  - ]:        321 :     M_insist(offsets[types.size()] + null_bitmap_size_in_bits * num_rows_per_block <=
     209                 :            :              block_size_in_bits * num_blocks_per_row,
     210                 :            :              "computed block layout must not exceed block size");
     211                 :            : 
     212                 :            :     /*----- Construct DataLayout. -----*/
     213         [ +  - ]:        321 :     DataLayout layout(num_tuples);
     214         [ +  - ]:        321 :     auto &pax_block = layout.add_inode(num_rows_per_block, num_blocks_per_row * block_size_in_bits);
     215         [ +  + ]:       1137 :     for (std::size_t idx = 0; idx != types.size(); ++idx)
     216   [ +  -  +  - ]:        816 :         pax_block.add_leaf(types[idx], idx, offsets[idx], types[idx]->size());
     217         [ +  - ]:        321 :     if (not options::remove_null_bitmap) {
     218         [ +  - ]:        642 :         pax_block.add_leaf( // add NULL bitmap
     219   [ +  -  +  - ]:        321 :             /* type=           */ Type::Get_Bitmap(Type::TY_Vector, types.size()),
     220                 :        321 :             /* idx=            */ types.size(),
     221                 :        321 :             /* offset_in_bits= */ offsets[types.size()],
     222                 :        321 :             /* stride_in_bits= */ null_bitmap_size_in_bits
     223                 :            :         );
     224                 :        321 :     }
     225                 :            : 
     226                 :        321 :     return layout;
     227         [ +  - ]:        321 : }
     228                 :            : 
     229                 :            : __attribute__((constructor(202)))
     230                 :          1 : static void register_data_layouts()
     231                 :            : {
     232                 :          1 :     Catalog &C = Catalog::Get();
     233                 :            : #define REGISTER_PAX_BYTES(NAME, BLOCK_SIZE, DESCRIPTION) \
     234                 :            :     C.register_data_layout(C.pool(#NAME), std::make_unique<PAXLayoutFactory>(PAXLayoutFactory::NBytes, BLOCK_SIZE), DESCRIPTION)
     235                 :            : #define REGISTER_PAX_TUPLES(NAME, BLOCK_SIZE, DESCRIPTION) \
     236                 :            :     C.register_data_layout(C.pool(#NAME), std::make_unique<PAXLayoutFactory>(PAXLayoutFactory::NTuples, BLOCK_SIZE), DESCRIPTION)
     237   [ +  -  +  - ]:          1 :     REGISTER_PAX_BYTES(PAX4M, 1UL << 22, "stores attributes using PAX layout with 4MiB blocks"); // default
     238   [ +  -  +  - ]:          1 :     REGISTER_PAX_BYTES(PAX4K, 1UL << 12, "stores attributes using PAX layout with 4KiB blocks");
     239   [ +  -  +  - ]:          1 :     REGISTER_PAX_BYTES(PAX64K, 1UL << 16, "stores attributes using PAX layout with 64KiB blocks");
     240   [ +  -  +  - ]:          1 :     REGISTER_PAX_BYTES(PAX512K, 1UL << 19, "stores attributes using PAX layout with 512KiB blocks");
     241   [ +  -  +  - ]:          1 :     REGISTER_PAX_BYTES(PAX64M, 1UL << 26, "stores attributes using PAX layout with 64MiB blocks");
     242   [ +  -  +  - ]:          1 :     REGISTER_PAX_TUPLES(PAX16Tup, 16, "stores attributes using PAX layout with blocks for 16 tuples");
     243   [ +  -  +  - ]:          1 :     REGISTER_PAX_TUPLES(PAX128Tup, 128, "stores attributes using PAX layout with blocks for 128 tuples");
     244   [ +  -  +  - ]:          1 :     REGISTER_PAX_TUPLES(PAX1024Tup, 1024, "stores attributes using PAX layout with blocks for 1024 tuples");
     245   [ +  -  -  + ]:          1 :     C.register_data_layout(C.pool("Row"), std::make_unique<RowLayoutFactory>(), "stores attributes in row-major order");
     246                 :            : #undef REGISTER_PAX
     247                 :          1 : }

Generated by: LCOV version 1.16