Branch data Line data Source code
1 : : #pragma once 2 : : 3 : : #include "util/hash.hpp" 4 : : #include <mutable/IR/QueryGraph.hpp> 5 : : #include <mutable/parse/AST.hpp> 6 : : #include <unordered_map> 7 : : #include <unordered_set> 8 : : #include <utility> 9 : : 10 : : 11 : : namespace m { 12 : : 13 : : struct GraphBuilder; 14 : : 15 : : /** Collects info of a subquery, i.e. the `GraphBuilder` holding the constructed subquery and all its associated 16 : : * information, as well as the alias assigned to the subquery. */ 17 : 0 : struct SubqueryInfo 18 : : { 19 : : const ast::QueryExpr &expr; 20 : : std::unique_ptr<GraphBuilder> builder; 21 : : ThreadSafePooledString alias; 22 : : 23 : 0 : SubqueryInfo(const ast::QueryExpr &expr, std::unique_ptr<GraphBuilder> builder, ThreadSafePooledString alias) 24 : 0 : : expr(expr) 25 : 0 : , builder(std::move(builder)) 26 [ # # ]: 0 : , alias(std::move(alias)) 27 : 0 : { } 28 : : }; 29 : : 30 : : /** Collects information of a `cnf::Clause`. */ 31 : : struct ClauseInfo 32 : : { 33 : : unsigned binding_depth = 0; 34 : : 35 : : std::unordered_set<ThreadSafePooledString> data_sources; 36 : : std::vector<SubqueryInfo> nested_queries; 37 : : 38 : : ClauseInfo(const cnf::Clause &clause); 39 : : 40 : 267 : bool is_constant() const { return data_sources.empty(); } 41 : 265 : bool is_selection() const { return data_sources.size() == 1; } 42 : : }; 43 : : 44 : : struct GraphBuilder : ast::ConstASTCommandVisitor 45 : : { 46 : : ///> maps a `cnf::Clause` to its `ClauseInfo` 47 : : using clause_map = std::unordered_map<cnf::Clause, ClauseInfo>; 48 : : 49 : : private: 50 : : /*----- Fields tracking correlation information -----*/ 51 : : ///> the grouping keys of the statement 52 : : std::vector<std::reference_wrapper<const ast::Expr>> existing_grouping_keys_; 53 : : ///> additionally required grouping keys to perform decorrelation 54 : : std::unordered_set<std::reference_wrapper<const ast::Expr>> additional_grouping_keys_; 55 : : 56 : : ///> to be handled by the current query 57 : : clause_map bound_clauses_; 58 : : ///> to be handled by an outer query 59 : : clause_map deferred_clauses_; 60 : : 61 : : ///> the query graph that is being constructed 62 : : std::unique_ptr<QueryGraph> graph_; 63 : : ///> maps `DataSource` names/aliases to the `DataSource` instance 64 : : std::unordered_map<ThreadSafePooledString, std::reference_wrapper<DataSource>> named_sources_; 65 : : ///> whether this graph needs grouping; either by explicily grouping or implicitly by using aggregations 66 : : bool needs_grouping_ = false; 67 : : 68 : : public: 69 : : GraphBuilder(); 70 : : 71 : : ///> returns the constructed `QueryGraph` 72 : 179 : std::unique_ptr<QueryGraph> get() { return std::move(graph_); } 73 : : 74 : : /*----- Visitor methods -----*/ 75 : : using ConstASTCommandVisitor::operator(); 76 : 179 : void operator()(Const<ast::Stmt> &s) { s.accept(*this); } 77 : 0 : void operator()(Const<ast::ErrorStmt>&) { M_unreachable("graph must not contain errors"); } 78 : 0 : void operator()(Const<ast::EmptyStmt>&) { /* nothing to be done */ } 79 : 0 : void operator()(Const<ast::CreateDatabaseStmt>&) { M_unreachable("not implemented"); } 80 : 0 : void operator()(Const<ast::DropDatabaseStmt>&) { M_unreachable("not implemented"); } 81 : 0 : void operator()(Const<ast::UseDatabaseStmt>&) { M_unreachable("not implemented"); } 82 : 0 : void operator()(Const<ast::CreateTableStmt>&) { M_unreachable("not implemented"); } 83 : 0 : void operator()(Const<ast::DropTableStmt>&) { M_unreachable("not implemented"); } 84 : : void operator()(Const<ast::SelectStmt> &s); 85 : 0 : void operator()(Const<ast::InsertStmt>&) { M_unreachable("not implemented"); } 86 : 0 : void operator()(Const<ast::UpdateStmt>&) { M_unreachable("not implemented"); } 87 : 0 : void operator()(Const<ast::DeleteStmt>&) { M_unreachable("not implemented"); } 88 : 0 : void operator()(Const<ast::DSVImportStmt>&) { M_unreachable("not implemented"); } 89 : : 90 : : /** Computes correlation information of \p clause. Analyzes the entire clause for how it can be decorrelated. 91 : : * 92 : : * 1. If the query does not have any grouping, clauses can be trivially decorrelated by lifting the clause into the 93 : : * join condition of the dependent join. 94 : : * 95 : : * If the query contains grouping, more in depth analysis is performed: 96 : : * 97 : : * 2. If the clause contains only bound variables, it is an uncorrelated selection of the query. 98 : : * 3. If the clause contains only free variables, it is an uncorrelated selection of an *outer* statement. 99 : : * 4. If it contains both bound and free variables, it is correlated and is analyzed more closely. Correlated 100 : : * clauses can be decorrelated if all bound *expressions* are composable of the grouping keys specified in the 101 : : * query. If it is not possible to compose all bound expressions from grouping keys, the clause cannot be 102 : : * decorrelated. 103 : : * 5. A special exception are clauses of a single equi-predicate, where one side of the equi-predicate contains 104 : : * only bound variables and the other contains only free variables. In that particular case, we can decorrelate 105 : : * the clause by introducing the bound expression as an additional grouping key to the query. 106 : : */ 107 : : void process_selection(cnf::Clause &clause); 108 : : }; 109 : : 110 : : }