LCOV - code coverage report
Current view: top level - src/IR - Optimizer.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 90 310 29.0 %
Date: 2025-03-25 01:19:55 Functions: 11 43 25.6 %
Branches: 84 760 11.1 %

           Branch data     Line data    Source code
       1                 :            : #include <mutable/IR/Optimizer.hpp>
       2                 :            : 
       3                 :            : #include <algorithm>
       4                 :            : #include <mutable/catalog/Catalog.hpp>
       5                 :            : #include <mutable/IR/Operator.hpp>
       6                 :            : #include <mutable/Options.hpp>
       7                 :            : #include <mutable/parse/AST.hpp>
       8                 :            : #include <mutable/storage/Store.hpp>
       9                 :            : #include <numeric>
      10                 :            : #include <vector>
      11                 :            : 
      12                 :            : 
      13                 :            : using namespace m;
      14                 :            : using namespace m::ast;
      15                 :            : 
      16                 :            : 
      17                 :            : /*======================================================================================================================
      18                 :            :  * Helper functions
      19                 :            :  *====================================================================================================================*/
      20                 :            : 
      21                 :          0 : struct WeighExpr
      22                 :            : {
      23                 :            :     private:
      24                 :          0 :     unsigned weight_ = 0;
      25                 :            : 
      26                 :            :     public:
      27                 :          0 :     operator unsigned() const { return weight_; }
      28                 :            : 
      29                 :            :     void operator()(const cnf::CNF &cnf) {
      30                 :            :         for (auto &clause : cnf)
      31                 :            :             (*this)(clause);
      32                 :            :     }
      33                 :            : 
      34                 :          0 :     void operator()(const cnf::Clause &clause) {
      35         [ #  # ]:          0 :         for (auto pred : clause)
      36                 :          0 :             (*this)(*pred);
      37                 :          0 :     }
      38                 :            : 
      39                 :          0 :     void operator()(const ast::Expr &e) {
      40                 :          0 :         visit(overloaded {
      41                 :          0 :             [](const ErrorExpr&) { M_unreachable("no errors at this stage"); },
      42                 :          0 :             [this](const Designator &d) {
      43         [ #  # ]:          0 :                 if (auto cs = cast<const CharacterSequence>(d.type()))
      44                 :          0 :                     weight_ += cs->length;
      45                 :            :                 else
      46                 :          0 :                     weight_ += 1;
      47                 :          0 :             },
      48                 :          0 :             [this](const Constant &e) {
      49         [ #  # ]:          0 :                 if (auto cs = cast<const CharacterSequence>(e.type()))
      50                 :          0 :                     weight_ += cs->length;
      51                 :            :                 // fixed-size constants are considered free, as they may be encoded as immediate constants in the instr
      52                 :          0 :             },
      53                 :          0 :             [this](const FnApplicationExpr&) {
      54                 :          0 :                 weight_ += 1;
      55                 :          0 :             },
      56                 :          0 :             [this](const UnaryExpr&) { weight_ += 1; },
      57                 :          0 :             [this](const BinaryExpr&) { weight_ += 1; },
      58                 :          0 :             [this](const QueryExpr&) { weight_ += 1000; } // XXX: this should never happen because of unnesting
      59                 :          0 :         }, e, tag<ConstPreOrderExprVisitor>{});
      60                 :          0 :     }
      61                 :            : };
      62                 :            : 
      63                 :          0 : std::vector<cnf::CNF> Optimizer::optimize_filter(cnf::CNF filter)
      64                 :            : {
      65                 :          0 :     constexpr unsigned MAX_WEIGHT = 12; // equals to 4 comparisons of fixed-length values
      66                 :          0 :     M_insist(not filter.empty());
      67                 :          0 : 
      68                 :            :     /* Compute clause weights. */
      69                 :          0 :     std::vector<unsigned> clause_weights;
      70         [ #  # ]:          0 :     clause_weights.reserve(filter.size());
      71         [ #  # ]:          0 :     for (auto &clause : filter) {
      72                 :          0 :         WeighExpr W;
      73         [ #  # ]:          0 :         W(clause);
      74         [ #  # ]:          1 :         clause_weights.emplace_back(W);
      75                 :            :     }
      76                 :            : 
      77                 :            :     /* Sort clauses by weight using an index vector. */
      78         [ #  # ]:          0 :     std::vector<std::size_t> order(filter.size(), 0);
      79         [ #  # ]:          0 :     std::iota(order.begin(), order.end(), 0);
      80         [ #  # ]:          0 :     std::sort(order.begin(), order.end(), [&clause_weights](std::size_t first, std::size_t second) -> bool {
      81                 :          0 :         return clause_weights[first] < clause_weights[second];
      82                 :            :     });
      83                 :            : 
      84                 :            :     /* Dissect filter into sequence of filters. */
      85                 :          0 :     std::vector<cnf::CNF> optimized_filters;
      86                 :          0 :     unsigned current_weight = 0;
      87                 :          0 :     cnf::CNF current_filter;
      88         [ #  # ]:          0 :     for (std::size_t i = 0, end = filter.size(); i != end; ++i) {
      89                 :          0 :         const std::size_t idx = order[i];
      90                 :          0 :         cnf::Clause clause(std::move(filter[idx]));
      91         [ #  # ]:          0 :         M_insist(not clause.empty());
      92                 :          0 :         const unsigned clause_weight = clause_weights[idx];
      93                 :            : 
      94   [ #  #  #  # ]:          0 :         if (not current_filter.empty() and current_weight + clause_weight > MAX_WEIGHT) {
      95         [ #  # ]:          0 :             optimized_filters.emplace_back(std::move(current_filter)); // empties current_filter
      96                 :          0 :             current_weight = 0;
      97                 :          0 :         }
      98                 :            : 
      99         [ #  # ]:          0 :         current_filter.emplace_back(std::move(clause));
     100                 :          0 :         current_weight += clause_weight;
     101                 :          0 :     }
     102         [ #  # ]:          0 :     if (not current_filter.empty())
     103         [ #  # ]:          0 :         optimized_filters.emplace_back(std::move(current_filter));
     104                 :            : 
     105         [ #  # ]:          0 :     M_insist(not optimized_filters.empty());
     106                 :          0 :     return optimized_filters;
     107         [ #  # ]:          0 : }
     108                 :            : 
     109                 :            : std::vector<Optimizer::projection_type>
     110                 :         81 : Optimizer::compute_projections_required_for_order_by(const std::vector<projection_type> &projections,
     111                 :            :                                                      const std::vector<order_type> &order_by)
     112                 :            : {
     113                 :         81 :     std::vector<Optimizer::projection_type> required_projections;
     114                 :            : 
     115                 :            :     /* Collect all required `Designator`s which are not included in the projection. */
     116                 :         81 :     auto get_required_designator = overloaded {
     117                 :         81 :         [&](const ast::Designator &d) -> void {
     118         [ #  # ]:          0 :             if (auto t = std::get_if<const Expr*>(&d.target())) { // refers to another expression?
     119                 :            :                 /*----- Find `t` in projections. -----*/
     120         [ #  # ]:          0 :                 for (auto &[expr, _] : projections) {
     121   [ #  #  #  # ]:          0 :                     if (*t == &expr.get())
     122                 :          0 :                         return; // found
     123                 :            :                 }
     124                 :          0 :             }
     125                 :            :             /*----- Find `d` in projections. -----*/
     126         [ #  # ]:          0 :             for (auto &[expr, alias] : projections) {
     127   [ #  #  #  #  :          0 :                 if (not alias.has_value() and d == expr.get())
                   #  # ]
     128                 :          0 :                     return; // found
     129                 :            :             }
     130         [ #  # ]:          0 :             required_projections.emplace_back(d, ThreadSafePooledOptionalString{});
     131                 :          0 :         },
     132                 :         81 :         [&](const ast::FnApplicationExpr &fn) -> void {
     133                 :            :             /*----- Find `fn` in projections. -----*/
     134         [ #  # ]:          0 :             for (auto &[expr, alias] : projections) {
     135   [ #  #  #  #  :          0 :                 if (not alias.has_value() and fn == expr.get())
                   #  # ]
     136                 :          0 :                     throw visit_skip_subtree(); // found
     137                 :            :             }
     138         [ #  # ]:          0 :             required_projections.emplace_back(fn, ThreadSafePooledOptionalString{});
     139                 :          0 :             throw visit_skip_subtree();
     140                 :          0 :         },
     141                 :          0 :         [](auto&&) -> void { /* nothing to be done */ },
     142                 :            :     };
     143                 :            :     /* Process the ORDER BY clause. */
     144         [ +  - ]:         81 :     for (auto [expr, _] : order_by)
     145         [ #  # ]:          0 :         visit(get_required_designator, expr.get(), m::tag<m::ast::ConstPreOrderExprVisitor>());
     146                 :            : 
     147                 :         81 :     return required_projections;
     148         [ +  - ]:         81 : }
     149                 :            : 
     150                 :            : 
     151                 :            : /*======================================================================================================================
     152                 :            :  * Optimizer
     153                 :            :  *====================================================================================================================*/
     154                 :            : 
     155                 :         81 : std::pair<std::unique_ptr<Producer>, PlanTableEntry> Optimizer::optimize(QueryGraph &G) const
     156                 :            : {
     157   [ -  -  +  - ]:         81 :     switch (Options::Get().plan_table_type)
     158                 :            :     {
     159                 :            :         case Options::PT_auto: {
     160                 :            :             /* Select most suitable type of plan table depending on the query graph structure.
     161                 :            :              * Currently a simple heuristic based on the number of data sources.
     162                 :            :              * TODO: Consider join edges too.  Eventually consider #CSGs. */
     163         [ +  - ]:         81 :             if (G.num_sources() <= 15) {
     164                 :         81 :                 auto [plan, PT] = optimize_with_plantable<PlanTableSmallOrDense>(G);
     165         [ -  + ]:         81 :                 return { std::move(plan), std::move(PT.get_final()) };
     166                 :         81 :             } else {
     167                 :          0 :                 auto [plan, PT] = optimize_with_plantable<PlanTableLargeAndSparse>(G);
     168         [ #  # ]:          0 :                 return { std::move(plan), std::move(PT.get_final()) };
     169                 :          0 :             }
     170                 :            :         }
     171                 :            : 
     172                 :            :         case Options::PT_SmallOrDense: {
     173                 :          0 :             auto [plan, PT] = optimize_with_plantable<PlanTableSmallOrDense>(G);
     174         [ #  # ]:          0 :             return { std::move(plan), std::move(PT.get_final()) };
     175                 :          0 :         }
     176                 :            : 
     177                 :            :         case Options::PT_LargeAndSparse: {
     178                 :          0 :             auto [plan, PT] = optimize_with_plantable<PlanTableLargeAndSparse>(G);
     179         [ #  # ]:          0 :             return { std::move(plan), std::move(PT.get_final()) };
     180                 :          0 :         }
     181                 :          0 :     }
     182                 :         81 : }
     183                 :            : 
     184                 :            : template<typename PlanTable>
     185                 :         81 : std::pair<std::unique_ptr<Producer>, PlanTable> Optimizer::optimize_with_plantable(QueryGraph &G) const
     186                 :            : {
     187                 :         81 :     PlanTable PT(G);
     188   [ +  -  #  # ]:         81 :     const auto num_sources = G.sources().size();
     189   [ +  -  #  # ]:         81 :     auto &C = Catalog::Get();
     190   [ +  -  +  -  :         81 :     auto &CE = C.get_database_in_use().cardinality_estimator();
             #  #  #  # ]
     191                 :            : 
     192   [ -  +  #  # ]:         81 :     if (num_sources == 0) {
     193   [ #  #  #  # ]:          0 :         PT.get_final().cost = 0; // no sources → no cost
     194   [ #  #  #  #  :          0 :         PT.get_final().model = CE.empty_model(); // XXX: should rather be 1 (single tuple) than empty
             #  #  #  # ]
     195   [ #  #  #  #  :          0 :         return { std::make_unique<ProjectionOperator>(G.projections()), std::move(PT) };
          #  #  #  #  #  
                #  #  # ]
     196                 :            :     }
     197                 :            : 
     198                 :            :     /*----- Initialize plan table and compute plans for data sources. -----*/
     199   [ +  -  #  # ]:         81 :     auto source_plans = optimize_source_plans(G, PT);
     200                 :            : 
     201                 :            :     /*----- Compute join order and construct plan containing all joins. -----*/
     202   [ +  -  #  # ]:         81 :     optimize_join_order(G, PT);
     203   [ +  -  #  # ]:         81 :     std::unique_ptr<Producer> plan = construct_join_order(G, PT, source_plans);
     204   [ +  -  #  # ]:         81 :     auto &entry = PT.get_final();
     205                 :            : 
     206                 :            :     /*----- Construct plan for remaining operations. -----*/
     207   [ +  -  #  # ]:         81 :     plan = optimize_plan(G, std::move(plan), entry);
     208                 :            : 
     209   [ +  -  #  # ]:         81 :     return { std::move(plan), std::move(PT) };
     210                 :         81 : }
     211                 :            : 
     212                 :            : template<typename PlanTable>
     213                 :         81 : std::unique_ptr<Producer*[]> Optimizer::optimize_source_plans(const QueryGraph &G, PlanTable &PT) const
     214                 :            : {
     215                 :         81 :     const auto num_sources = G.sources().size();
     216                 :         81 :     auto &CE = Catalog::Get().get_database_in_use().cardinality_estimator();
     217                 :            : 
     218                 :         81 :     auto source_plans = std::make_unique<Producer*[]>(num_sources);
     219   [ +  -  +  +  :        162 :     for (auto &ds : G.sources()) {
             #  #  #  # ]
     220   [ +  -  +  -  :         81 :         Subproblem s = Subproblem::Singleton(ds->id());
             #  #  #  # ]
     221   [ +  -  +  -  :         81 :         if (auto bt = cast<const BaseTable>(ds.get())) {
             #  #  #  # ]
     222                 :            :             /* Produce a scan for base tables. */
     223   [ +  -  #  # ]:         81 :             PT[s].cost = 0;
     224   [ +  -  +  -  :         81 :             PT[s].model = CE.estimate_scan(G, s);
             #  #  #  # ]
     225   [ +  -  +  -  :         81 :             auto &store = bt->table().store();
             #  #  #  # ]
     226   [ +  -  +  -  :         81 :             auto source = std::make_unique<ScanOperator>(store, bt->name().assert_not_none());
          +  -  #  #  #  
                #  #  # ]
     227                 :            : 
     228                 :            :             /* Set operator information. */
     229   [ +  -  #  # ]:         81 :             auto source_info = std::make_unique<OperatorInformation>();
     230                 :         81 :             source_info->subproblem = s;
     231   [ +  -  +  -  :         81 :             source_info->estimated_cardinality = CE.predict_cardinality(*PT[s].model);
             #  #  #  # ]
     232   [ +  -  #  # ]:         81 :             source->info(std::move(source_info));
     233                 :            : 
     234   [ +  -  +  -  :         81 :             source_plans[ds->id()] = source.release();
             #  #  #  # ]
     235                 :         81 :         } else {
     236                 :            :             /* Recursively solve nested queries. */
     237   [ #  #  #  # ]:          0 :             auto &Q = as<const Query>(*ds);
     238   [ #  #  #  #  :          0 :             const bool old = std::exchange(needs_projection_, Q.alias().has_value()); // aliased nested queries need projection
             #  #  #  # ]
     239   [ #  #  #  #  :          0 :             auto [sub_plan, sub] = optimize(Q.query_graph());
             #  #  #  # ]
     240                 :          0 :             needs_projection_ = old;
     241                 :            : 
     242                 :            :             /* If an alias for the nested query is given, prefix every attribute with the alias. */
     243   [ #  #  #  #  :          0 :             if (Q.alias().has_value()) {
          #  #  #  #  #  
                #  #  # ]
     244   [ #  #  #  #  :          0 :                 M_insist(is<ProjectionOperator>(sub_plan), "only projection may rename attributes");
             #  #  #  # ]
     245                 :          0 :                 Schema S;
     246   [ #  #  #  #  :          0 :                 for (auto &e : sub_plan->schema())
          #  #  #  #  #  
          #  #  #  #  #  
                   #  # ]
     247   [ #  #  #  #  :          0 :                     S.add({ Q.alias(), e.id.name }, e.type, e.constraints);
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
                      # ]
     248   [ #  #  #  #  :          0 :                 sub_plan->schema() = S;
             #  #  #  # ]
     249                 :          0 :             }
     250                 :            : 
     251                 :            :             /* Update the plan table with the `DataModel` and cost of the nested query and save the plan in the array of
     252                 :            :              * source plans. */
     253   [ #  #  #  # ]:          0 :             PT[s].cost = sub.cost;
     254   [ #  #  #  # ]:          0 :             sub.model->assign_to(s); // adapt model s.t. it describes the result of the current subproblem
     255   [ #  #  #  # ]:          0 :             PT[s].model = std::move(sub.model);
     256   [ #  #  #  #  :          0 :             source_plans[ds->id()] = sub_plan.release();
             #  #  #  # ]
     257                 :          0 :         }
     258                 :            : 
     259                 :            :         /* Apply filter, if any. */
     260   [ +  -  -  +  :         81 :         if (ds->filter().size()) {
             #  #  #  # ]
     261                 :            :             /* Optimize the filter by splitting into smaller filters and ordering them. */
     262   [ #  #  #  #  :          0 :             std::vector<cnf::CNF> filters = Optimizer::optimize_filter(ds->filter());
          #  #  #  #  #  
                #  #  # ]
     263   [ #  #  #  #  :          0 :             Producer *filtered_ds = source_plans[ds->id()];
             #  #  #  # ]
     264                 :            : 
     265                 :            :             /* Construct a plan as a sequence of filters. */
     266   [ #  #  #  # ]:          0 :             for (auto &&filter : filters) {
     267                 :            :                 /* Update data model with filter. */
     268   [ #  #  #  #  :          0 :                 auto new_model = CE.estimate_filter(G, *PT[s].model, filter);
             #  #  #  # ]
     269   [ #  #  #  # ]:          0 :                 PT[s].model = std::move(new_model);
     270                 :            : 
     271   [ #  #  #  #  :          0 :                 if (filter.size() == 1 and filter[0].size() > 1) { // disjunctive filter
             #  #  #  # ]
     272   [ #  #  #  # ]:          0 :                     auto tmp = std::make_unique<DisjunctiveFilterOperator>(std::move(filter));
     273   [ #  #  #  # ]:          0 :                     tmp->add_child(filtered_ds);
     274                 :          0 :                     filtered_ds = tmp.release();
     275                 :          0 :                 } else {
     276   [ #  #  #  # ]:          0 :                     auto tmp = std::make_unique<FilterOperator>(std::move(filter));
     277   [ #  #  #  # ]:          0 :                     tmp->add_child(filtered_ds);
     278                 :          0 :                     filtered_ds = tmp.release();
     279                 :          0 :                 }
     280                 :            : 
     281                 :            :                 /* Set operator information. */
     282   [ #  #  #  # ]:          0 :                 auto source_info = std::make_unique<OperatorInformation>();
     283                 :          0 :                 source_info->subproblem = s;
     284   [ #  #  #  #  :          0 :                 source_info->estimated_cardinality = CE.predict_cardinality(*PT[s].model); // includes filters, if any
             #  #  #  # ]
     285   [ #  #  #  # ]:          0 :                 filtered_ds->info(std::move(source_info));
     286                 :          0 :             }
     287                 :            : 
     288   [ #  #  #  #  :          0 :             source_plans[ds->id()] = filtered_ds;
             #  #  #  # ]
     289                 :          0 :         }
     290                 :            :     }
     291                 :         81 :     return source_plans;
     292   [ +  -  #  # ]:         81 : }
     293                 :            : 
     294                 :            : template<typename PlanTable>
     295                 :         81 : void Optimizer::optimize_join_order(const QueryGraph &G, PlanTable &PT) const
     296                 :            : {
     297                 :         81 :     Catalog &C = Catalog::Get();
     298                 :         81 :     auto &CE = C.get_database_in_use().cardinality_estimator();
     299                 :            : 
     300                 :            : #ifndef NDEBUG
     301   [ +  -  #  # ]:         81 :     if (Options::Get().statistics) {
     302                 :          0 :         std::size_t num_CSGs = 0, num_CCPs = 0;
     303                 :          0 :         const Subproblem All = Subproblem::All(G.num_sources());
     304                 :          0 :         auto inc_CSGs = [&num_CSGs](Subproblem) { ++num_CSGs; };
     305                 :          0 :         auto inc_CCPs = [&num_CCPs](Subproblem, Subproblem) { ++num_CCPs; };
     306   [ #  #  #  # ]:          0 :         G.adjacency_matrix().for_each_CSG_undirected(All, inc_CSGs);
     307   [ #  #  #  # ]:          0 :         G.adjacency_matrix().for_each_CSG_pair_undirected(All, inc_CCPs);
     308                 :          0 :         std::cout << num_CSGs << " CSGs, " << num_CCPs << " CCPs" << std::endl;
     309                 :          0 :     }
     310                 :            : #endif
     311                 :            : 
     312   [ +  -  -  +  :        162 :     M_TIME_EXPR(plan_enumerator()(G, cost_function(), PT), "Plan enumeration", C.timer());
          +  -  +  -  +  
          -  #  #  #  #  
                   #  # ]
     313                 :            : 
     314   [ +  -  #  # ]:         81 :     if (Options::Get().statistics) {
     315                 :          0 :         std::cout << "Est. total cost: " << PT.get_final().cost
     316                 :          0 :                   << "\nEst. result set size: " << CE.predict_cardinality(*PT.get_final().model)
     317                 :          0 :                   << "\nPlan cost: " << PT[PT.get_final().left].cost + PT[PT.get_final().right].cost
     318                 :          0 :                   << std::endl;
     319                 :          0 :     }
     320                 :         81 : }
     321                 :            : 
     322                 :            : template<typename PlanTable>
     323                 :         81 : std::unique_ptr<Producer> Optimizer::construct_join_order(const QueryGraph &G, const PlanTable &PT,
     324                 :            :                                                           const std::unique_ptr<Producer*[]> &source_plans) const
     325                 :            : {
     326                 :         81 :     auto &CE = Catalog::Get().get_database_in_use().cardinality_estimator();
     327                 :            : 
     328                 :         81 :     std::vector<std::reference_wrapper<Join>> joins;
     329   [ +  -  +  -  :         81 :     for (auto &J : G.joins()) joins.emplace_back(*J);
          #  #  #  #  #  
                #  #  # ]
     330                 :            : 
     331                 :            :     /* Use nested lambdas to implement recursive lambda using CPS. */
     332                 :        162 :     const auto construct_recursive = [&](Subproblem s) -> Producer* {
     333                 :        162 :         auto construct_plan_impl = [&](Subproblem s, auto &construct_plan_rec) -> Producer* {
     334                 :         81 :             auto subproblems = PT[s].get_subproblems();
     335   [ -  +  #  # ]:         81 :             if (subproblems.empty()) {
     336   [ +  -  #  # ]:         81 :                 M_insist(s.size() == 1);
     337   [ +  -  +  -  :         81 :                 return source_plans[*s.begin()];
          +  -  #  #  #  
                #  #  # ]
     338                 :            :             } else {
     339                 :            :                 /* Compute plan for each sub problem.  Must happen *before* calculating the join predicate. */
     340                 :          0 :                 std::vector<Producer*> sub_plans;
     341   [ #  #  #  # ]:          0 :                 for (auto sub : subproblems)
     342   [ #  #  #  #  :          0 :                     sub_plans.push_back(construct_plan_rec(sub, construct_plan_rec));
             #  #  #  # ]
     343                 :            : 
     344                 :            :                 /* Calculate the join predicate. */
     345                 :          0 :                 cnf::CNF join_condition;
     346   [ #  #  #  # ]:          0 :                 for (auto it = joins.begin(); it != joins.end(); ) {
     347   [ #  #  #  # ]:          0 :                     Subproblem join_sources;
     348                 :            :                     /* Compute subproblem of sources to join. */
     349   [ #  #  #  #  :          0 :                     for (auto ds : it->get().sources())
                   #  # ]
     350   [ #  #  #  #  :          0 :                         join_sources(ds.get().id()) = true;
             #  #  #  # ]
     351                 :            : 
     352   [ #  #  #  #  :          0 :                     if (join_sources.is_subset(s)) { // possible join
                   #  # ]
     353   [ #  #  #  #  :          0 :                         join_condition = join_condition and it->get().condition();
                   #  # ]
     354   [ #  #  #  # ]:          0 :                         it = joins.erase(it);
     355                 :          0 :                     } else {
     356                 :          0 :                         ++it;
     357                 :            :                     }
     358                 :            :                 }
     359                 :            : 
     360                 :            :                 /* Construct the join. */
     361   [ #  #  #  # ]:          0 :                 auto join = std::make_unique<JoinOperator>(join_condition);
     362   [ #  #  #  # ]:          0 :                 for (auto sub_plan : sub_plans)
     363   [ #  #  #  # ]:          0 :                     join->add_child(sub_plan);
     364   [ #  #  #  # ]:          0 :                 auto join_info = std::make_unique<OperatorInformation>();
     365                 :          0 :                 join_info->subproblem = s;
     366   [ #  #  #  #  :          0 :                 join_info->estimated_cardinality = CE.predict_cardinality(*PT[s].model);
             #  #  #  # ]
     367                 :          0 :                 join->info(std::move(join_info));
     368                 :          0 :                 return join.release();
     369                 :          0 :             }
     370                 :         81 :         };
     371                 :         81 :         return construct_plan_impl(s, construct_plan_impl);
     372                 :            :     };
     373                 :            : 
     374   [ +  -  +  -  :         81 :     return std::unique_ptr<Producer>(construct_recursive(Subproblem::All(G.sources().size())));
          +  -  #  #  #  
                #  #  # ]
     375                 :         81 : }
     376                 :            : 
     377                 :         81 : std::unique_ptr<Producer> Optimizer::optimize_plan(const QueryGraph &G, std::unique_ptr<Producer> plan,
     378                 :            :                                                    PlanTableEntry &entry) const
     379                 :            : {
     380                 :         81 :     auto &CE = Catalog::Get().get_database_in_use().cardinality_estimator();
     381                 :            : 
     382                 :            :     /* Perform grouping. */
     383         [ +  - ]:         81 :     if (not G.group_by().empty()) {
     384                 :            :         /* Compute `DataModel` after grouping. */
     385                 :          0 :         auto new_model = CE.estimate_grouping(G, *entry.model, G.group_by()); // TODO provide aggregates
     386                 :          0 :         entry.model = std::move(new_model);
     387                 :            :         // TODO pick "best" algorithm
     388   [ #  #  #  #  :          0 :         auto group_by = std::make_unique<GroupingOperator>(G.group_by(), G.aggregates());
                   #  # ]
     389         [ #  # ]:          0 :         group_by->add_child(plan.release());
     390                 :            : 
     391                 :            :         /* Set operator information. */
     392         [ #  # ]:          0 :         auto info = std::make_unique<OperatorInformation>();
     393   [ #  #  #  # ]:          0 :         info->subproblem = Subproblem::All(G.sources().size());
     394         [ #  # ]:          0 :         info->estimated_cardinality = CE.predict_cardinality(*entry.model);
     395                 :            : 
     396         [ #  # ]:          0 :         group_by->info(std::move(info));
     397                 :          0 :         plan = std::move(group_by);
     398         [ +  - ]:         81 :     } else if (not G.aggregates().empty()) {
     399                 :            :         /* Compute `DataModel` after grouping. */
     400         [ #  # ]:          0 :         auto new_model = CE.estimate_grouping(G, *entry.model, std::vector<GroupingOperator::group_type>());
     401                 :          0 :         entry.model = std::move(new_model);
     402   [ #  #  #  # ]:          0 :         auto agg = std::make_unique<AggregationOperator>(G.aggregates());
     403         [ #  # ]:          0 :         agg->add_child(plan.release());
     404                 :            : 
     405                 :            :         /* Set operator information. */
     406         [ #  # ]:          0 :         auto info = std::make_unique<OperatorInformation>();
     407   [ #  #  #  # ]:          0 :         info->subproblem = Subproblem::All(G.sources().size());
     408         [ #  # ]:          0 :         info->estimated_cardinality = CE.predict_cardinality(*entry.model);
     409                 :            : 
     410         [ #  # ]:          0 :         agg->info(std::move(info));
     411                 :          0 :         plan = std::move(agg);
     412                 :          0 :     }
     413                 :            : 
     414                 :         81 :     auto additional_projections = Optimizer::compute_projections_required_for_order_by(G.projections(), G.order_by());
     415                 :         81 :     const bool requires_post_projection = not additional_projections.empty();
     416                 :            : 
     417                 :            :     /* Perform projection. */
     418   [ +  -  +  - ]:         81 :     if (not additional_projections.empty() or not G.projections().empty()) {
     419                 :            :         /* Merge original projections with additional projections. */
     420   [ +  +  +  -  :          0 :         additional_projections.insert(additional_projections.end(), G.projections().begin(), G.projections().end());
                   +  - ]
     421         [ +  - ]:         81 :         auto projection = std::make_unique<ProjectionOperator>(std::move(additional_projections));
     422         [ +  - ]:         81 :         projection->add_child(plan.release());
     423                 :            : 
     424                 :            :         /* Set operator information. */
     425         [ +  - ]:         81 :         auto info = std::make_unique<OperatorInformation>();
     426   [ +  -  +  - ]:         81 :         info->subproblem = Subproblem::All(G.sources().size());
     427   [ +  -  +  - ]:         81 :         info->estimated_cardinality = projection->child(0)->info().estimated_cardinality;
     428                 :            : 
     429         [ +  - ]:         81 :         projection->info(std::move(info));
     430                 :         81 :         plan = std::move(projection);
     431                 :         81 :     }
     432                 :            : 
     433                 :            :     /* Perform ordering. */
     434   [ +  +  -  + ]:        162 :     if (not G.order_by().empty()) {
     435                 :            :         // TODO estimate data model
     436   [ #  #  #  # ]:          0 :         auto order_by = std::make_unique<SortingOperator>(G.order_by());
     437         [ #  # ]:          0 :         order_by->add_child(plan.release());
     438                 :            : 
     439                 :            :         /* Set operator information. */
     440         [ #  # ]:          0 :         auto info = std::make_unique<OperatorInformation>();
     441   [ #  #  #  # ]:          0 :         info->subproblem = Subproblem::All(G.sources().size());
     442   [ #  #  #  # ]:          0 :         info->estimated_cardinality = order_by->child(0)->info().estimated_cardinality;
     443                 :            : 
     444         [ #  # ]:          0 :         order_by->info(std::move(info));
     445                 :          0 :         plan = std::move(order_by);
     446                 :          0 :     }
     447                 :            : 
     448                 :            :     /* Limit. */
     449   [ +  +  +  -  :         81 :     if (G.limit().limit or G.limit().offset) {
             +  -  -  + ]
     450                 :            :         /* Compute `DataModel` after limit. */
     451   [ #  #  #  #  :          0 :         auto new_model = CE.estimate_limit(G, *entry.model, G.limit().limit, G.limit().offset);
                   #  # ]
     452                 :          0 :         entry.model = std::move(new_model);
     453                 :            :         // TODO estimate data model
     454   [ #  #  #  #  :          0 :         auto limit = std::make_unique<LimitOperator>(G.limit().limit, G.limit().offset);
                   #  # ]
     455         [ #  # ]:          0 :         limit->add_child(plan.release());
     456                 :            : 
     457                 :            :         /* Set operator information. */
     458         [ #  # ]:          0 :         auto info = std::make_unique<OperatorInformation>();
     459   [ #  #  #  # ]:          0 :         info->subproblem = Subproblem::All(G.sources().size());
     460         [ #  # ]:          0 :         info->estimated_cardinality = CE.predict_cardinality(*entry.model);
     461                 :            : 
     462         [ #  # ]:          0 :         limit->info(std::move(info));
     463                 :          0 :         plan = std::move(limit);
     464                 :          0 :     }
     465                 :            : 
     466                 :            :     /* Perform post-ordering projection. */
     467   [ +  -  +  -  :         81 :     if (requires_post_projection or (not is<ProjectionOperator>(plan) and needs_projection_)) {
             -  +  #  # ]
     468                 :            :         // TODO estimate data model
     469                 :            :         /* Change aliased projections in designators with the alias as name since original projection is
     470                 :            :          * performed beforehand. */
     471                 :          0 :         std::vector<projection_type> adapted_projections;
     472   [ #  #  #  #  :          0 :         for (auto [expr, alias] : G.projections()) {
                   #  # ]
     473   [ #  #  #  # ]:          0 :             if (alias.has_value()) {
     474   [ #  #  #  #  :          0 :                 Token name(expr.get().tok.pos, alias.assert_not_none(), TK_IDENTIFIER);
                   #  # ]
     475   [ #  #  #  #  :          0 :                 auto d = std::make_unique<const Designator>(Token::CreateArtificial(), Token::CreateArtificial(),
                   #  # ]
     476         [ #  # ]:          0 :                                                             std::move(name), expr.get().type(), &expr.get());
     477         [ #  # ]:          0 :                 adapted_projections.emplace_back(*d, ThreadSafePooledOptionalString{});
     478         [ #  # ]:          0 :                 created_exprs_.emplace_back(std::move(d));
     479                 :          0 :             } else {
     480         [ #  # ]:          0 :                 adapted_projections.emplace_back(expr, ThreadSafePooledOptionalString{});
     481                 :            :             }
     482                 :          0 :         }
     483         [ #  # ]:          0 :         auto projection = std::make_unique<ProjectionOperator>(std::move(adapted_projections));
     484         [ #  # ]:          0 :         projection->add_child(plan.release());
     485                 :            : 
     486                 :            :         /* Set operator information. */
     487         [ #  # ]:          0 :         auto info = std::make_unique<OperatorInformation>();
     488   [ #  #  #  # ]:          0 :         info->subproblem = Subproblem::All(G.sources().size());
     489   [ #  #  #  # ]:          0 :         info->estimated_cardinality = projection->child(0)->info().estimated_cardinality;
     490                 :            : 
     491         [ #  # ]:          0 :         projection->info(std::move(info));
     492                 :          0 :         plan = std::move(projection);
     493                 :          0 :     }
     494                 :         81 :     return plan;
     495                 :        405 : }
     496                 :            : 
     497                 :            : #define DEFINE(PLANTABLE) \
     498                 :            : template \
     499                 :            : std::pair<std::unique_ptr<Producer>, PLANTABLE> \
     500                 :            : Optimizer::optimize_with_plantable(QueryGraph&) const; \
     501                 :            : template \
     502                 :            : std::unique_ptr<Producer*[]> \
     503                 :            : Optimizer::optimize_source_plans(const QueryGraph&, PLANTABLE&) const; \
     504                 :            : template \
     505                 :            : void \
     506                 :            : Optimizer::optimize_join_order(const QueryGraph&, PLANTABLE&) const; \
     507                 :            : template \
     508                 :            : std::unique_ptr<Producer> \
     509                 :            : Optimizer::construct_join_order(const QueryGraph&, const PLANTABLE&, const std::unique_ptr<Producer*[]>&) const
     510                 :            : DEFINE(PlanTableSmallOrDense);
     511                 :            : DEFINE(PlanTableLargeAndSparse);
     512                 :            : #undef DEFINE

Generated by: LCOV version 1.16