![]() |
mutable
A Database System for Research and Fast Prototyping
|
#include <mutable/catalog/CostModel.hpp>
#include "storage/ColumnStore.hpp"
#include "storage/store_manip.hpp"
#include "util/GridSearch.hpp"
#include "util/stream.hpp"
#include <mutable/catalog/TrainedCostFunction.hpp>
#include <mutable/mutable.hpp>
#include <type_traits>
Go to the source code of this file.
Macros | |
#define | DEFINE(TYPE) |
Functions | |
uint64_t | get_column_offset_in_bytes (const DataLayout &layout, std::size_t idx) |
Returns the offset in bytes of the idx -th column in the DataLayout layout which is considered to represent a PAX-layout. | |
void | save_csv (const std::string &csv_path, const Eigen::MatrixXd &matrix, const std::string &header="") |
Save matrix in a csv file. | |
Timer::duration | time_select_query_execution (Database &DB, const std::string &input) |
Executes the given query and returns the median of query execution times for all SELECT queries. | |
template<typename T > | |
std::pair< Eigen::MatrixXd, Eigen::VectorXd > | generate_training_suite_filter () |
Generates data for training filter models. | |
template<typename T > | |
std::pair< Eigen::MatrixXd, Eigen::VectorXd > | generate_training_suite_group_by () |
Generates data for training group-by-models. | |
template<typename T > | |
std::pair< Eigen::MatrixXd, Eigen::VectorXd > | generate_training_suite_join () |
Generates data for training join-models. | |
DEFINE (int8_t) | |
DEFINE (int16_t) | |
DEFINE (int32_t) | |
DEFINE (int64_t) | |
DEFINE (float) | |
DEFINE (double) | |
Variables | |
static constexpr std::size_t | NUM_DISTINCT_VALUES_IN_FILTER_EXPERIMENT = 100 |
static constexpr unsigned | DEFAULT_FILTER_POLYNOMIAL_DEGREE = 9 |
constexpr unsigned | NUM_REPETITIONS = 5 |
The number of times a benchmark should be repeated to reduce noise in the data. | |
#define DEFINE | ( | TYPE | ) |
Definition at line 598 of file CostModel.cpp.
DEFINE | ( | double | ) |
DEFINE | ( | float | ) |
DEFINE | ( | int16_t | ) |
DEFINE | ( | int32_t | ) |
DEFINE | ( | int64_t | ) |
DEFINE | ( | int8_t | ) |
std::pair< Eigen::MatrixXd, Eigen::VectorXd > generate_training_suite_filter | ( | ) |
Generates data for training filter models.
Returns a pair of matrices for the features and the target.
Definition at line 93 of file CostModel.cpp.
References m::Catalog::add_database(), m::Database::add_table(), m::Catalog::create_store(), m::gs::LinearSpace< T >::delta(), m::Catalog::drop_database(), m::fill_uniform(), m::generate_primary_keys(), m::Catalog::Get(), get_column_offset_in_bytes(), m::Type::Get_Integer(), m::gs::LinearSpace< T >::hi(), m::gs::LinearSpace< T >::lo(), M_insist, m::storage::PAXLayoutFactory::NTuples, NUM_DISTINCT_VALUES_IN_FILTER_EXPERIMENT, m::gs::GridSearch< Spaces >::num_points(), m::Catalog::pool(), m::Table::push_back(), m::gs::LinearSpace< T >::sequence(), m::set_all_not_null(), m::T(), time_select_query_execution(), and m::Catalog::unset_database_in_use().
std::pair< Eigen::MatrixXd, Eigen::VectorXd > generate_training_suite_group_by | ( | ) |
Generates data for training group-by-models.
Returns a pair of matrices for the features and the target.
Definition at line 190 of file CostModel.cpp.
References m::Catalog::add_database(), m::Database::add_table(), m::Catalog::create_store(), m::fill_uniform(), m::generate_primary_keys(), m::Catalog::Get(), get_column_offset_in_bytes(), m::Type::Get_Integer(), m::gs::LinearSpace< T >::hi(), M_insist, m::storage::PAXLayoutFactory::NTuples, m::gs::GridSearch< Spaces >::num_points(), m::Catalog::pool(), m::Table::push_back(), m::gs::LinearSpace< T >::sequence(), m::set_all_not_null(), m::T(), and time_select_query_execution().
std::pair< Eigen::MatrixXd, Eigen::VectorXd > generate_training_suite_join | ( | ) |
Generates data for training join-models.
Results are written in filepath
.
Definition at line 322 of file CostModel.cpp.
References m::Catalog::add_database(), m::Database::add_table(), m::and, m::Catalog::create_store(), m::fill_uniform(), m::generate_primary_keys(), m::Catalog::Get(), get_column_offset_in_bytes(), m::Type::Get_Integer(), M_insist, m::storage::PAXLayoutFactory::NTuples, m::gs::GridSearch< Spaces >::num_points(), m::Catalog::pool(), m::Table::push_back(), m::gs::LinearSpace< T >::sequence(), m::set_all_not_null(), m::T(), and time_select_query_execution().
uint64_t get_column_offset_in_bytes | ( | const DataLayout & | layout, |
std::size_t | idx | ||
) |
Returns the offset in bytes of the idx
-th column in the DataLayout
layout
which is considered to represent a PAX-layout.
Definition at line 28 of file CostModel.cpp.
References m::storage::DataLayout::child(), and M_insist.
Referenced by generate_training_suite_filter(), generate_training_suite_group_by(), and generate_training_suite_join().
void save_csv | ( | const std::string & | csv_path, |
const Eigen::MatrixXd & | matrix, | ||
const std::string & | header = "" |
||
) |
Save matrix in a csv file.
Definition at line 37 of file CostModel.cpp.
Referenced by m::CostModelFactory::generate_filter_cost_model(), m::CostModelFactory::generate_group_by_cost_model(), and m::CostModelFactory::generate_join_cost_model().
Timer::duration time_select_query_execution | ( | Database & | DB, |
const std::string & | input | ||
) |
Executes the given query and returns the median of query execution times for all SELECT queries.
Definition at line 60 of file CostModel.cpp.
References m::execute_query(), m::Catalog::Get(), NUM_REPETITIONS, and m::statement_from_string().
Referenced by generate_training_suite_filter(), generate_training_suite_group_by(), and generate_training_suite_join().
|
staticconstexpr |
Definition at line 17 of file CostModel.cpp.
Referenced by m::CostModelFactory::get_cost_function().
|
staticconstexpr |
Definition at line 16 of file CostModel.cpp.
Referenced by generate_training_suite_filter().
|
constexpr |
The number of times a benchmark should be repeated to reduce noise in the data.
Definition at line 19 of file CostModel.cpp.
Referenced by time_select_query_execution().