Branch data Line data Source code
1 : : #include "IR/QueryGraph.hpp"
2 : :
3 : : #include "IR/QueryGraph2SQL.hpp"
4 : : #include "parse/ASTDumper.hpp"
5 : : #include <mutable/catalog/Catalog.hpp>
6 : : #include <mutable/IR/CNF.hpp>
7 : : #include <mutable/parse/AST.hpp>
8 : : #include <mutable/util/macro.hpp>
9 : :
10 : :
11 : : using namespace m;
12 : :
13 : :
14 : : struct Decorrelation;
15 : :
16 : :
17 : : /*======================================================================================================================
18 : : * Query
19 : : *====================================================================================================================*/
20 : :
21 : 2 : bool Query::is_correlated() const { return query_graph_->is_correlated(); }
22 : :
23 : :
24 : : /*======================================================================================================================
25 : : * Decorrelation of nested queries
26 : : * -------------------------------
27 : : *
28 : : * DEFINITION: A predicate is *correlated* if it contains at least one *free* variable *and* at least one *bound*
29 : : * variable.
30 : : *
31 : : * NOTE: A free variable *must* be bound by an outer statement, otherwise the query is ill-formed (and should not have
32 : : * passed semantic analysis).
33 : 280 : *
34 : : * EXAMPLES:
35 : : * (1)
36 : : * ```
37 : : * SELECT T.id
38 : : * FROM T
39 : : * WHERE T.n = (
40 : : * SELECT COUNT(*)
41 : : * FROM S
42 : : * WHERE S.x = T.y -- T.y is a free variable, bound by the outer statement.
43 : : * )
44 : : * ```
45 : : *
46 : : * ```
47 : : * ⧑ (T.n = S.$agg0) ← dependent join
48 : : * / \
49 : : * T Γ (; COUNT(*) AS $agg0)
50 : : * |
51 : : * σ (S.x = T.y) ← correlated predicate
52 : : * |
53 : : * S
54 : : * ```
55 : : *
56 : : * To decorrelate T.y, group S by S.x, then lift join condition above grouping:
57 : : *
58 : : * ```
59 : : * SELECT T.id
60 : : * FROM T, (
61 : : * SELECT S.x, COUNT(*) AS $agg0
62 : : * FROM S
63 : : * GROUP BY S.x -- group by the correlated equi-predicate
64 : : * ) AS S
65 : : * WHERE S.x = T.y -- unnest the inner query by joining with outer on the equi-predicate
66 : 179 : * AND T.n = S.$agg0 -- and join on the transformed original predicate
67 : 0 : * ```
68 : : *
69 : : * ```
70 : : * ⨝ (S.x = T.y AND T.n = S.$agg0) ← regular join
71 : : * / \
72 : : * T Γ (S.x; COUNT(*) AS $agg0) ← group by attribute of correlated predicate
73 : : * |
74 : 1 : * S
75 : : * ```
76 : : *
77 : : * (2)
78 : : * ```
79 : : * SELECT T.id
80 : : * FROM T
81 : : * WHERE T.n = (
82 : : * SELECT COUNT(*)
83 : : * FROM S
84 : : * WHERE S.x = ISNULL(T.y) -- T.y is a free variable, bound by the outer statement.
85 : : * )
86 : : * ```
87 : : *
88 : : * ```
89 : : * ⧑ (T.n = S.$agg0) ← dependent join
90 : : * / \
91 : : * T Γ (; COUNT(*) AS $agg0)
92 : : * |
93 : : * σ (S.x = ISNULL(T.y)) ← correlated predicate
94 : : * |
95 : : * S
96 : : * ```
97 : : *
98 : : * To decorrelate T.y, group S by S.x, then lift join condition above grouping:
99 : : *
100 : : * ```
101 : : * SELECT T.id
102 : : * FROM T, (
103 : : * SELECT S.x, COUNT(*) as $agg0
104 : : * FROM S
105 : : * GROUP BY S.x -- group by the correlated equi-predicate
106 : : * ) AS S
107 : : * WHERE S.x = ISNULL(T.y) -- unnest the inner query by joining with outer on the equi-predicate
108 : : * AND T.n = S.$agg0 -- and join on the transformed original predicate
109 : : * ```
110 : : *
111 : : * ```
112 : : * ⨝ (S.x = ISNULL(T.y) AND T.n = S.$agg0) ← dependent join
113 : : * / \
114 : : * T Γ (S.x; COUNT(*) AS $agg0) ← group by attribute of correlated predicate
115 : : * |
116 : : * S
117 : : * ```
118 : : *
119 : : * (3)
120 : : * ```
121 : : * SELECT T.id
122 : : * FROM T
123 : : * WHERE T.n = (
124 : : * SELECT COUNT(*)
125 : : * FROM S
126 : : * WHERE ISNULL(S.x) = T.y -- T.y is a free variable, bound by the outer statement.
127 : : * )
128 : : * ```
129 : : *
130 : : * is converted to
131 : : *
132 : : * ```
133 : : * SELECT T.id
134 : : * FROM T, (
135 : : * SELECT $expr0, COUNT(*) AS $agg0
136 : : * FROM S
137 : : * GROUP BY ISNULL(S.x) AS $expr0
138 : : * ) AS S
139 : : * WHERE S.$expr0 = T.y
140 : : * AND T.n = S.$agg0
141 : : * ```
142 : : *
143 : : * (4)
144 : : * ```
145 : : * SELECT T.id
146 : : * FROM T
147 : : * WHERE EXISTS (
148 : : * SELECT 1
149 : : * FROM S
150 : : * WHERE S.a + S.b = T.c -- T.c is a free variable, bound by the outer statement.
151 : : * )
152 : : * ```
153 : : *
154 : : * ```
155 : : * ⧑ (TRUE) ← dependent join without condition (not a Cartesian product!)
156 : : * / \
157 : : * T EXISTS
158 : : * |
159 : : * σ (S.a + S.b = T.c) ← correlated predicate
160 : : * |
161 : : * S
162 : : * ```
163 : : *
164 : : * is converted to
165 : : *
166 : : * ```
167 : : * SELECT T.id
168 : : * FROM T, (
169 : : * SELECT $expr0
170 : : * FROM S
171 : : * GROUP BY S.a + S.b AS $expr0
172 : : * ) AS S
173 : : * WHERE T.c = S.$expr0
174 : : * ```
175 : : *
176 : : * ```
177 : : * ⨝ (T.c = S.$expr0)
178 : : * / \
179 : : * T Γ (S.a + S.b AS $expr0; )
180 : : * |
181 : : * S
182 : : * ```
183 : : *
184 : : * (5)
185 : : * ```
186 : : * SELECT T.id
187 : : * FROM T
188 : : * WHERE EXISTS (
189 : : * SELECT 1
190 : : * FROM S
191 : : * WHERE S.a + T.b = S.c -- T.b is a free variable, bound by the outer statement.
192 : : * )
193 : : * ```
194 : 184 : *
195 : : * ```
196 : : * ⧑ (TRUE) ← dependent join without condition (not a Cartesian product!)
197 : : * / \
198 : : * T EXISTS
199 : : * |
200 : : * σ (S.a + T.b = S.c) ← correlated predicate
201 : : * |
202 : : * S
203 : : * ```
204 : : *
205 : : * **cannot** be converted to
206 : : *
207 : : * ```
208 : : * SELECT T.id
209 : : * FROM T, (
210 : : * SELECT S.a, S.c
211 : : * FROM S
212 : : * GROUP BY S.a, S.c -- FIXME This is incorrect. We should transform the predicate and group by S.c - S.a
213 : : * ) AS S
214 : : * WHERE S.a + T.b = S.c -- TODO Hypothetically, we must rewrite to T.b = S.c - S.a to have single group
215 : : * ```
216 : : *
217 : : * ```
218 : : * ⨝ (T.b = S.a + S.c)
219 : : * / \
220 : : * T Γ (S.a, S.b; )
221 : : * |
222 : : * S
223 : : * ```
224 : : *
225 : : * (6)
226 : : * ```
227 : : * SELECT T.id
228 : : * FROM T
229 : : * WHERE T.max = (
230 : : * SELECT MAX(S.a * T.b)
231 : : * FROM S
232 : : * WHERE S.c = T.c
233 : : * )
234 : : * ```
235 : : *
236 : : * is converted to
237 : : *
238 : : * ```
239 : : * SELECT T.id
240 : : * FROM T, (
241 : : * SELECT S.a, S.c
242 : : * FROM S
243 : : * GROUP BY S.a, S.c
244 : : * ) AS S
245 : : * WHERE T.max = S.a * T.b
246 : : * AND T.c = S.c
247 : : * ```
248 : : *
249 : : * (7)
250 : : * ```
251 : : * SELECT T.id
252 : : * FROM T
253 : : * WHERE T.max != ( -- non-equi predicate here is valid, its not a correlated predicate
254 : : * SELECT COUNT(*)
255 : : * FROM S
256 : : * WHERE S.x = T.y
257 : : * )
258 : : * ```
259 : : *
260 : : * is converted to
261 : : *
262 : : * ```
263 : : * SELECT T.id
264 : : * FROM T, (
265 : : * SELECT S.x, COUNT(*)
266 : : * FROM S
267 : : * GROUP BY S.x
268 : : * ) AS S
269 : : * WHERE T.max != S.x
270 : : * AND S.x = T.y
271 : : * ```
272 : : *
273 : : * (8)
274 : : * ```
275 : : * SELECT T.id
276 : : * FROM T
277 : : * WHERE T.max == (
278 : : * SELECT COUNT(*)
279 : : * FROM S
280 : : * WHERE S.x != T.y -- non-equi predicate here is correlated
281 : : * )
282 : : * ```
283 : : *
284 : : * ```
285 : : * ⧑ (T.max = $agg0) ← dependent join
286 : : * / \
287 : : * T Γ (; COUNT(*) AS $agg0)
288 : : * |
289 : : * σ (S.x != T.y) ← correlated predicate
290 : : * |
291 : : * S
292 : : * ```
293 : : *
294 : : * We currently can't do better than a dependent join. We cannout lift the correlated predicate above the grouping
295 : : * of the correlated query.
296 : : * We could lift the correlated predicate and the grouping above the dependent join. This would transform the
297 : : * dependent join to a regular join with the correlated predicate as join predicate and the original join predicate
298 : : * hoisted above grouping. See below:
299 : : *
300 : : * ```
301 : : * σ (T.max = $agg0) ← dependent join
302 : : * |
303 : : * Γ (T.id; COUNT(*) AS $agg0) ← grouping by PK of T implies grouping by all of T
304 : : * |
305 : : * ⨝ (S.x != T.y) ← formerly correlated predicate becomes decorrelated join predicate
306 : : * / \
307 : : * T S
308 : : * ```
309 : : *
310 : : * TASKS:
311 : : *
312 : : * (1) Analyze the WHERE and HAVING clauses of a query for correlations.
313 : : *
314 : : * (2) To analyze a clause, we must analyze each predicate individually. So far, we only support a single predicate
315 : : * inside a correlated clause. Abort, if a correlated clause has more than one predicate.
316 : : *
317 : : * (3) For each predicate, check whether it has free variables. If this is not the case, we treat it as a regular
318 : : * predicate.
319 : : *
320 : : * (4) If all predicates of a clause contain solely *free* variables, the entire clause can be lifted out of the nested
321 : : * query and into the statement that *binds* the free variables.
322 : : *
323 : : * (5) If the predicate is correlated, we recursively analyze the predicate's expression (pre order). If we find an
324 : : * *n*-ary expression (e.g. binary or function application) that is correlated, we abort. The reasoning is that *n*-ary
325 : : * expressions cannot be broken apart into uncorrelated subexpression *while preserving query semantics*. An
326 : : * **important exception** to this rule are equi-predicates (i.e. `=`).
327 : : *
328 : : * (6) If the correlated expression is an equi-predicate of the form E1 = E2, recursively analyze E1 and E2. If E1 or E2
329 : : * are correlated, we abort (see (5)). If both E1 and E2 are uncorrelated, then one expression contains only bound
330 : : * variables while the other contains only free variables. We can then transform the entire statement to decorrelate
331 : : * the predicate.
332 : : *
333 : : * (7) To decorrelate the predicate, group the correlated subquery by the correlated subexpression of the equi-predicate
334 : : * (E1 or E2). Then, add the original predicate as a clause to the join condition of the dependent join between the
335 : : * statement that binds the free variables of the subexpression and the nested query that contains the predicate
336 : : * (potentially by multiple levels of nesting).
337 : : *
338 : : *
339 : : * DEPENDENT JOIN:
340 : : *
341 : : * Transforming non-equi predicates gives no benefit over a dependent join, unless we can perform *re*-aggregation (e.g.
342 : : * SUM of COUNTs).
343 : : *
344 : : *====================================================================================================================*/
345 : :
346 : : /** Given a `SelectStmt` \p stmt, extract the aggregates to compute while grouping. */
347 : : std::vector<std::reference_wrapper<const ast::FnApplicationExpr>>
348 : 190 : get_aggregates(const ast::SelectStmt &stmt)
349 : : {
350 : 190 : std::vector<std::reference_wrapper<const ast::FnApplicationExpr>> aggregates;
351 : : auto handle_fn = overloaded {
352 : 39 : [](auto&) { },
353 : 205 : [&aggregates](const ast::FnApplicationExpr &e) {
354 : 15 : M_insist(e.has_function());
355 [ + + ]: 15 : if (e.get_function().is_aggregate()) { // test that this is an aggregation
356 : : using std::find_if, std::to_string;
357 : 14 : const std::string str = to_string(e);
358 : 18 : auto exists = [&str](std::reference_wrapper<const ast::Expr> agg) {
359 : 4 : return to_string(agg.get()) == str;
360 : : };
361 [ + - + + ]: 14 : if (find_if(aggregates.begin(), aggregates.end(), exists) == aggregates.end()) // if not already present
362 [ - + ]: 13 : aggregates.push_back(e);
363 [ - + ]: 14 : throw visit_skip_subtree{}; // skip recursing into subexpressions
364 : 14 : }
365 : 15 : },
366 : : };
367 : :
368 [ + + ]: 190 : if (stmt.having)
369 [ + - + - ]: 5 : visit(handle_fn, *as<ast::HavingClause>(*stmt.having).having, m::tag<ast::ConstPreOrderExprVisitor>());
370 : :
371 [ + - + + ]: 215 : for (auto &s : as<ast::SelectClause>(*stmt.select).select)
372 [ + - ]: 25 : visit(handle_fn, *s.first, m::tag<ast::ConstPreOrderExprVisitor>());
373 : :
374 [ + + ]: 190 : if (stmt.order_by) {
375 [ + - + + ]: 16 : for (auto &o : as<ast::OrderByClause>(*stmt.order_by).order_by)
376 [ + - ]: 9 : visit(handle_fn, *o.first, m::tag<ast::ConstPreOrderExprVisitor>());
377 : 7 : }
378 : :
379 : 190 : return aggregates;
380 [ + - ]: 190 : }
381 : :
382 : : /** Computes whether the bound parts of \p expr are composable of elements in \p components. */
383 : 0 : bool is_composable_of(const ast::Expr &expr,
384 : : const std::vector<std::reference_wrapper<const ast::Expr>> components)
385 : : {
386 : 0 : auto recurse = overloaded {
387 : 0 : [&](const ast::Designator &d) -> bool {
388 [ # # ]: 0 : return d.contains_free_variables() or d.is_identifier(); // identifiers are implicitly composable, as they never refer into a table XXX do we have to check the target?
389 : : },
390 : 0 : [&](const ast::FnApplicationExpr &e) -> bool {
391 [ # # # # ]: 0 : if (not is_composable_of(*e.fn, components)) return false;
392 [ # # ]: 0 : for (auto &arg : e.args) {
393 [ # # # # ]: 0 : if (not is_composable_of(*arg, components))
394 : 0 : return false;
395 : : }
396 : 0 : return true;
397 : 0 : },
398 [ # # ]: 0 : [&](const ast::UnaryExpr &e) -> bool { return is_composable_of(*e.expr, components); },
399 : 0 : [&](const ast::BinaryExpr &e) -> bool {
400 [ # # # # : 0 : return is_composable_of(*e.lhs, components) and is_composable_of(*e.rhs, components);
# # # # #
# # # ]
401 : 0 : },
402 : 0 : [](auto&) -> bool { return true; },
403 : : };
404 : :
405 [ # # ]: 0 : for (auto c : components)
406 [ # # ]: 0 : if (expr == c.get()) return true; // syntactically equivalent to a component
407 : 0 : return visit(recurse, expr, m::tag<m::ast::ConstASTExprVisitor>()); // attempt to recursively compose expr
408 : 0 : }
409 : :
410 [ + - ]: 179 : GraphBuilder::GraphBuilder() : graph_(std::make_unique<QueryGraph>()) { }
411 : :
412 : : /** Collects correlation information of all `cnf::Clause`s occurring in the `QueryGraph`.
413 : : *
414 : : * `QueryCorrelationInfo` collects all tables referenced by free variables (i.e. correlated `Designator`s). It further
415 : : * collects all correlated subexpressions occuring in the `QueryGraph` and dissects them based on whether they occur in
416 : : * equi- or non-equi-predicates. */
417 : :
418 : 560 : ClauseInfo::ClauseInfo(const cnf::Clause &clause)
419 : : {
420 : : /*----- Compute whether the clause has bound or free variables, what tables are referenced, and which nested queries
421 : : * occur inside the clause. -----*/
422 : 280 : auto visitor = overloaded {
423 : 288 : [](auto&) -> void { },
424 : 822 : [this](const ast::Designator &D) {
425 : 542 : binding_depth = std::max(binding_depth, D.binding_depth());
426 [ + + ]: 542 : if (D.has_table_name())
427 [ + - ]: 537 : data_sources.emplace(D.get_table_name());
428 : 542 : },
429 : 280 : [this](const ast::QueryExpr &Q) {
430 : 0 : auto B = std::make_unique<GraphBuilder>();
431 [ # # ]: 0 : (*B)(*Q.query);
432 [ # # # # ]: 0 : nested_queries.emplace_back(Q, std::move(B), Q.alias());
433 [ # # # # ]: 0 : data_sources.emplace(Q.alias()); // by unnesting, this QueryExpr becomes a DataSource
434 : 0 : },
435 : : };
436 [ + + ]: 560 : for (const auto &pred : clause)
437 [ + - + - ]: 280 : visit(visitor, *pred, m::tag<ast::ConstPreOrderExprVisitor>());
438 : 280 : }
439 : :
440 : 267 : void GraphBuilder::process_selection(cnf::Clause &clause)
441 : : {
442 : 267 : ClauseInfo CI(clause);
443 : :
444 : : /*----- Introduce subqueries as sources. -----*/
445 [ - + ]: 267 : for (auto &subquery : CI.nested_queries) {
446 [ # # # # : 0 : auto &ref = graph_->add_source(subquery.alias, subquery.builder->get());
# # ]
447 [ # # ]: 0 : named_sources_.emplace(subquery.alias, ref);
448 [ # # ]: 0 : for (auto &[deferred_clause, deferred_CI] : subquery.builder->deferred_clauses_) {
449 [ # # ]: 0 : M_insist(deferred_CI.binding_depth > 0, "was this clause bound in its subquery?");
450 : 0 : --deferred_CI.binding_depth;
451 : : // TODO process all designators, redirecting free designators when they are bound; this would enable us to
452 : : // lift correlated claues w/o having to introduce new designators that replace parts of the AST
453 [ # # ]: 0 : if (deferred_CI.binding_depth == 0)
454 [ # # # # ]: 0 : bound_clauses_.try_emplace(std::move(deferred_clause), std::move(deferred_CI)); // it's now bound
455 : : else
456 [ # # # # ]: 0 : deferred_clauses_.try_emplace(std::move(deferred_clause), std::move(deferred_CI)); // still not bound, deferr further
457 : : }
458 : : }
459 : :
460 [ - + ]: 267 : if (not needs_grouping_) { // no grouping ⇒ all clauses can trivially be decorrelated
461 [ + - ]: 267 : if (CI.binding_depth == 0)
462 [ + - ]: 267 : bound_clauses_.try_emplace(clause, std::move(CI));
463 : : else
464 [ # # ]: 0 : deferred_clauses_.try_emplace(clause, std::move(CI));
465 : 267 : return;
466 : : }
467 : :
468 : : /* A clause with a single predicate can be an equi-clause. If it is an equi-clause, it can be decorrelated more
469 : : * cleverly by introducing additional grouping keys. */
470 [ # # # # ]: 0 : if (CI.binding_depth != 0 and clause.size() == 1) {
471 : 0 : auto pred = clause[0];
472 : :
473 : : /*----- `pred` is correlated: `pred` contains both bound and free variables -----*/
474 [ # # # # ]: 0 : auto binary = cast<const ast::BinaryExpr>(&pred.expr());
475 [ # # ]: 0 : if (not binary) // not binary expression ⇒ no equi-clause
476 : 0 : goto fallback;
477 : :
478 [ # # # # ]: 0 : if (binary->op().type != m::TK_EQUAL) // no equi-predicate ⇒ no equi-clause
479 : 0 : goto fallback;
480 : :
481 : : /*----- The clause contains a single equi-predicate. Check whether it can be decorrelated. -----*/
482 [ # # ]: 0 : bool lhs_has_free_variables = binary->lhs->contains_free_variables();
483 [ # # ]: 0 : bool rhs_has_free_variables = binary->rhs->contains_free_variables();
484 [ # # # # : 0 : if (lhs_has_free_variables and binary->lhs->contains_bound_variables()) // mixed LHS
# # ]
485 : 0 : goto fallback;
486 [ # # # # : 0 : if (rhs_has_free_variables and binary->rhs->contains_bound_variables()) // mixed RHS
# # ]
487 : 0 : goto fallback;
488 [ # # ]: 0 : M_insist(lhs_has_free_variables != rhs_has_free_variables, "only either side may contain free variables");
489 : :
490 : : /*----- The clause contains a single equi-predicate, that can be decorrelated. -----*/
491 [ # # ]: 0 : deferred_clauses_.try_emplace(clause, std::move(CI));
492 : : // auto &expr_with_free_vars = lhs_has_free_variables ? *binary->lhs : *binary->rhs;
493 [ # # ]: 0 : auto &expr_with_bound_vars = lhs_has_free_variables ? *binary->rhs : *binary->lhs;
494 : :
495 : : /*----- Ensure that the *bound* part of the equi-predicate is comosable of the grouping keys. -----*/
496 [ # # # # : 0 : if (not is_composable_of(expr_with_bound_vars, existing_grouping_keys_)) // is bound part composable?
# # ]
497 [ # # ]: 0 : additional_grouping_keys_.emplace(expr_with_bound_vars); // no ⇒ introduce new grouping key
498 : 0 : return;
499 : : }
500 : :
501 : : fallback:
502 : : /*----- Clauses with more than one predicate are *always* non-equi clauses and must be analyzed accordingly. -----*/
503 : : /* This is *not* an equi-clause, since it has more than one predicate joined by disjunction.
504 : : * Check whether the clause is correlated, i.e. whether it contains both free and bound variables. Note, that
505 : : * it is not enough to check whether the predicates of this clause are correlated, as a predicate of only bound
506 : : * variables together with a predicate of only free variables renders the clause correlated.
507 : : *
508 : : * If the clause is correlated, check whether *all* bound variables are embedded in the grouping keys. If this
509 : : * is the case, we can *trivially* decorrelate the clause. Otherwise, decorrelation is not possible and we
510 : : * resort to dependent join. */
511 : :
512 : : /*----- Check whether the clause is correlated. -----*/
513 [ # # ]: 0 : if (CI.binding_depth == 0) {
514 [ # # ]: 0 : bound_clauses_.try_emplace(clause, std::move(CI));
515 : 0 : } else {
516 : : /* The clause contains free variables. Check whether the predicates are composable of the grouping keys. */
517 [ # # ]: 0 : for (auto pred : clause) {
518 [ # # # # : 0 : if (not is_composable_of(pred.expr(), existing_grouping_keys_)) {
# # # # ]
519 : : // TODO dependent join
520 [ # # # # : 0 : throw m::invalid_argument("the bound parts of a correlated clause cannot be composed of the "
# # ]
521 : : "grouping keys");
522 : : }
523 : : }
524 : : /* All predicates of this clause are composable of the grouping keys. Therefore, the entire clause can
525 : : * trivially be decorrelated. */
526 [ # # ]: 0 : deferred_clauses_.try_emplace(clause, std::move(CI));
527 : : }
528 : 267 : }
529 : :
530 : :
531 : :
532 : :
533 : :
534 : :
535 : :
536 : :
537 : :
538 : :
539 : :
540 : :
541 : :
542 : :
543 : :
544 : :
545 : :
546 : :
547 : :
548 : :
549 : :
550 : :
551 : :
552 : :
553 : : /** Returns `true` iff both designators has the same textual representation. */
554 : 0 : bool equal(const ast::Designator &one, const ast::Designator &two) {
555 [ # # # # ]: 0 : return streq(to_string(one).c_str(), to_string(two).c_str());
556 : 0 : }
557 : :
558 : : /** Like `std::vector::emplace_back()` but adds only iff `pair` is not already contained in `pairs`. */
559 : 0 : void emplace_back_unique(std::vector<std::pair<const ast::Expr*, PooledOptionalString>> &pairs,
560 : : const std::pair<const ast::Designator*, PooledOptionalString> &pair)
561 : : {
562 [ # # ]: 0 : for (auto p : pairs) {
563 [ # # # # : 0 : if (auto d = cast<const ast::Designator>(p.first); d and equal(*d, *pair.first))
# # # # ]
564 : 0 : return;
565 [ # # # ]: 0 : }
566 : 0 : pairs.emplace_back(pair);
567 : 0 : }
568 : :
569 : : /** Like `std::vector::emplace_back()` but adds only iff `pair` is not already contained in `pairs`. */
570 : 0 : void emplace_back_unique(std::vector<std::pair<const ast::Expr*, PooledOptionalString>> &pairs,
571 : : const std::pair<const ast::Expr*, PooledOptionalString> &pair)
572 : : {
573 [ # # ]: 0 : if (auto d = cast<const ast::Designator>(pair.first))
574 [ # # ]: 0 : emplace_back_unique(pairs, std::pair<const ast::Designator*, PooledOptionalString>(d, pair.second));
575 [ # # ]: 0 : else if (not contains(pairs, pair))
576 : 0 : pairs.emplace_back(pair);
577 : 0 : }
578 : :
579 : : /** Like `std::vector::emplace_back()` but adds only iff `des` is not already contained in `exprs`. */
580 : 0 : void emplace_back(std::vector<const ast::Expr*> &exprs, const ast::Designator *des)
581 : : {
582 [ # # ]: 0 : for (auto e : exprs) {
583 [ # # # # ]: 0 : if (auto d = cast<const ast::Designator>(e); d and equal(*d, *des))
584 : 0 : return;
585 : : }
586 : 0 : exprs.emplace_back(des);
587 : 0 : }
588 : :
589 : : /** Like `std::vector::emplace_back()` but adds only iff `expr` is not already contained in `exprs`. */
590 : 0 : void emplace_back_unique(std::vector<const ast::Expr*> &exprs, const ast::Expr *expr)
591 : : {
592 [ # # ]: 0 : if (auto d = cast<const ast::Designator>(expr))
593 : 0 : emplace_back(exprs, d);
594 [ # # ]: 0 : else if (not contains(exprs, expr))
595 : 0 : exprs.emplace_back(expr);
596 : 0 : }
597 : :
598 : : /** Like `std::vector::emplace_back()` but adds only iff `src` is not already contained in `sources`. */
599 : 0 : void emplace_back_unique(std::vector<DataSource*> &sources, DataSource *src)
600 : : {
601 [ # # ]: 0 : for (auto s : sources) {
602 [ # # # # : 0 : if (s->name() == src->name())
# # ]
603 : 0 : return;
604 : : }
605 : 0 : sources.emplace_back(src);
606 : 0 : }
607 : :
608 : : /** Like `std::vector::insert()` but adds only those elements of `insertions` which are not already contained in
609 : : * `exprs`. */
610 : 0 : void insert(std::vector<const ast::Expr*> &exprs, const std::vector<const ast::Designator*> &insertions) {
611 [ # # ]: 0 : for (auto d : insertions)
612 : 0 : emplace_back(exprs, d);
613 : 0 : }
614 : :
615 : : /** Like `std::vector::insert()` but adds only those elements of `insertions` which are not already contained in
616 : : * `exprs`. */
617 : 0 : void insert(std::vector<const ast::Expr*> &exprs, const std::vector<const ast::Expr*> &insertions) {
618 [ # # ]: 0 : for (auto e : insertions) {
619 [ # # ]: 0 : if (auto d = cast<const ast::Designator>(e))
620 : 0 : emplace_back(exprs, d);
621 [ # # ]: 0 : else if (not contains(exprs, e))
622 : 0 : exprs.emplace_back(e);
623 : : }
624 : 0 : }
625 : :
626 : : /** Helper structure to extract the `QueryExpr`s in an expression. */
627 : : struct GetNestedQueries : ast::ConstASTExprVisitor
628 : : {
629 : : private:
630 : : std::vector<const ast::QueryExpr*> queries_;
631 : :
632 : : public:
633 : : GetNestedQueries() { }
634 : :
635 : : std::vector<const ast::QueryExpr*> get() { return std::move(queries_); }
636 : :
637 : : using ConstASTExprVisitor::operator();
638 : : void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
639 : :
640 : : void operator()(Const<ast::Designator>&) override { /* nothing to be done */ }
641 : :
642 : : void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
643 : :
644 : : void operator()(Const<ast::FnApplicationExpr>&) override { /* nothing to be done */ }
645 : :
646 : : void operator()(Const<ast::UnaryExpr> &e) override { (*this)(*e.expr); }
647 : :
648 : : void operator()(Const<ast::BinaryExpr> &e) override { (*this)(*e.lhs); (*this)(*e.rhs); }
649 : :
650 : : void operator()(Const<ast::QueryExpr> &e) override { queries_.push_back(&e); }
651 : : };
652 : :
653 : : /** Helper structure to compute and provide primary keys. */
654 : : #if 0
655 : : struct m::GetPrimaryKey
656 : : {
657 : : private:
658 : : std::vector<const ast::Expr*> primary_keys_; ///< a list of all primary keys
659 : :
660 : : public:
661 : : GetPrimaryKey() { }
662 : :
663 : : auto get() { return std::move(primary_keys_); }
664 : :
665 : : void compute(DataSource *source) {
666 : : if (auto base = cast<BaseTable>(source)) {
667 : : Catalog &C = Catalog::Get();
668 : : Position pos(C.pool("Decorrelation"));
669 : : ast::Token dot(pos, C.pool("."), TK_DOT);
670 : : ast::Token tbl(pos, base->name(), TK_IDENTIFIER);
671 : : for (auto pkey : base->table().primary_key()) {
672 : : ast::Token attr(pos, pkey->name, TK_IDENTIFIER);
673 : : auto D = new ast::Designator(dot, tbl, attr, pkey->type, pkey);
674 : : primary_keys_.push_back(D);
675 : : // base->expansion_.push_back(D);
676 : : }
677 : : } else if (auto query = cast<Query>(source)) {
678 : : auto &graph = query->query_graph();
679 : : auto former_size = primary_keys_.size();
680 : : if (graph.grouping()) {
681 : : /* Grouping keys form a new primary key. */
682 : : for (auto e : graph.group_by()) {
683 : : Catalog &C = Catalog::Get();
684 : : Position pos(C.pool("Decorrelation"));
685 : : ast::Token dot(pos, C.pool("."), TK_DOT);
686 : : ast::Token tbl(pos, query->name(), TK_IDENTIFIER);
687 : : std::ostringstream oss;
688 : : oss << e;
689 : : ast::Token attr(pos, C.pool(oss.str().c_str()), TK_IDENTIFIER);
690 : : auto D = new ast::Designator(dot, tbl, attr, as<const PrimitiveType>(e.get().type())->as_scalar(), e);
691 : : primary_keys_.push_back(D);
692 : : }
693 : : } else {
694 : : /* Primary keys of all sources for the primary key. */
695 : : for (auto ds : graph.sources())
696 : : compute(ds);
697 : : }
698 : : /* Provide all newly inserted primary keys. */
699 : : if (not graph.projections().empty()) {
700 : : while (former_size != primary_keys_.size()) {
701 : : emplace_back_unique(graph.projections_,
702 : : std::pair<const ast::Expr *, const char *>(primary_keys_.at(former_size), nullptr));
703 : : ++former_size;
704 : : }
705 : : }
706 : : } else {
707 : : M_unreachable("invalid variant");
708 : : }
709 : : }
710 : : };
711 : : #endif
712 : :
713 : : /** Compute and provide all primary keys of `source`. */
714 : 0 : auto get_primary_key(DataSource *source) {
715 : : // GetPrimaryKey GPK; // TODO we could write this much denser if we had a recursive Expr visitor
716 : : // GPK.compute(source);
717 : : // return GPK.get();
718 : 0 : }
719 : :
720 : : /** Helper structure to extract the `Designator`s (and `FnApplicationExpr`s) in an expression or a data source. */
721 : : #if 0
722 : : struct GetDesignators : ast::ConstASTExprVisitor
723 : : {
724 : : private:
725 : : std::vector<const ast::Expr*> designators_; ///< vector of all designators (and `FnApplicationExpr`s)
726 : : bool aggregates_ = true; ///< indicates whether `FnApplicationExpr`s are considered or only their arguments
727 : :
728 : : public:
729 : : GetDesignators() { }
730 : :
731 : : std::vector<const ast::Expr*> get() { return std::move(designators_); }
732 : :
733 : : void aggregates(bool aggregates) { aggregates_ = aggregates; }
734 : :
735 : : using ConstASTExprVisitor::operator();
736 : : void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
737 : :
738 : : void operator()(Const<ast::Designator> &e) override { designators_.push_back(&e); }
739 : :
740 : : void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
741 : :
742 : : void operator()(Const<ast::FnApplicationExpr> &e) override {
743 : : if (aggregates_)
744 : : designators_.push_back(&e);
745 : : else {
746 : : for (auto &arg : e.args)
747 : : (*this)(*arg);
748 : : }
749 : : }
750 : :
751 : : void operator()(Const<ast::UnaryExpr> &e) override { (*this)(*e.expr); }
752 : :
753 : : void operator()(Const<ast::BinaryExpr> &e) override { (*this)(*e.lhs); (*this)(*e.rhs); }
754 : :
755 : : void operator()(Const<ast::QueryExpr>&) override { /* nothing to be done */ }
756 : : };
757 : : #endif
758 : :
759 : : /** Given a query graph and a query within the graph, compute all needed attributes by `graph` and `query`, i.e. those
760 : : * in the filter and joins of `query` plus those in the grouping (or the projection if no grouping exists). */
761 : : #if 0
762 : : auto get_needed_attrs(const QueryGraph *graph, const Query &query)
763 : : {
764 : : M_insist(contains(graph->sources(), &query));
765 : :
766 : : GetDesignators GD; // TODO we could write this much denser if we had a recursive Expr visitor
767 : : auto it = std::find(graph->sources().begin(), graph->sources().end(), &query);
768 : : for (auto &c : (*it)->filter()) {
769 : : for (auto &p : c)
770 : : GD(*p.expr());
771 : : }
772 : : for (auto &j : (*it)->joins()) {
773 : : for (auto &c : j->condition()) {
774 : : for (auto &p : c)
775 : : GD(*p.expr());
776 : : }
777 : : }
778 : : if (graph->grouping()) {
779 : : for (auto &e : graph->group_by())
780 : : GD(*e);
781 : : GD.aggregates(false); // to search for needed attributes in the arguments of aggregates
782 : : for (auto &e : graph->aggregates())
783 : : GD(*e);
784 : : } else {
785 : : for (auto &e : graph->projections())
786 : : GD(*e.first);
787 : : }
788 : : return GD.get();
789 : : }
790 : : #endif
791 : :
792 : : /** Helper structure to replace designators in an `Expr`. */
793 : : #if 0
794 : : struct ReplaceDesignators : ast::ConstASTExprVisitor
795 : : {
796 : : private:
797 : : const std::vector<const ast::Expr*> *origin_; ///< list of all reachable expressions
798 : : ast::Designator *new_designator_ = nullptr; ///< the newly generated designator to use, `nullptr` if nothing to replace
799 : : std::vector<const ast::Expr*> deletions_; ///< all removed expressions which have to be deleted manually
800 : :
801 : : public:
802 : : ReplaceDesignators() { }
803 : :
804 : : /** Replaces all designators all predicates of `cnf` by new ones pointing to the respective designator in `origin`.
805 : : * Returns all replaced designators.
806 : : * IMPORTANT: Replaced designators are neither deleted by this structure nor by `~Expr()` anymore;
807 : : * caller must delete them!!! */
808 : : std::vector<const ast::Expr*> replace(cnf::CNF &cnf, const std::vector<const ast::Expr*> *origin,
809 : : bool target_deleted = false)
810 : : {
811 : : origin_ = origin;
812 : : deletions_.clear();
813 : : cnf::CNF cnf_new;
814 : : for (auto &c : cnf) {
815 : : cnf::Clause c_new;
816 : : for (auto &p : c) {
817 : : auto e = p.expr();
818 : : (*this)(*e);
819 : : if (cast<const ast::Designator>(e) and new_designator_) {
820 : : /* `e` has to be replaced by `new_designator_`. `e` has to be deleted manually iff the target is
821 : : * deleted (since it is removed from `target`) and `new_designator` has to be deleted manually iff the
822 : : * target is not deleted. */
823 : : c_new = c_new or cnf::Clause({cnf::Predicate::Create(new_designator_, p.negative())});
824 : : if (target_deleted)
825 : : deletions_.push_back(e);
826 : : else
827 : : deletions_.push_back(new_designator_);
828 : : new_designator_ = nullptr;
829 : : } else {
830 : : c_new = c_new or cnf::Clause({p});
831 : : }
832 : : }
833 : : cnf_new = cnf_new and cnf::CNF({c_new});
834 : : }
835 : : cnf = std::move(cnf_new);
836 : : return std::move(deletions_);
837 : : }
838 : :
839 : : /** Replaces all designators in `target` by new ones pointing to the respective designator in `origin`.
840 : : * Returns all replaced designators. `target_deleted` indicates whether `target` is deleted by another structure.
841 : : * IMPORTANT: Replaced designators are neither deleted by this structure nor by `~Expr()` anymore;
842 : : * caller must delete them!!! */
843 : : std::vector<const ast::Expr*> replace(std::vector<const ast::Expr*> &target, const std::vector<const ast::Expr*> *origin,
844 : : bool target_deleted = false) {
845 : : origin_ = origin;
846 : : deletions_.clear();
847 : : std::vector<const ast::Expr*> target_new;
848 : : for (auto e : target) {
849 : : (*this)(*e);
850 : : if (cast<const ast::Designator>(e) and new_designator_) {
851 : : /* `e` has to be replaced by `new_designator_`. `e` has to be deleted manually iff the target is
852 : : * deleted (since it is removed from `target`) and `new_designator` has to be deleted manually iff the
853 : : * target is not deleted. */
854 : : target_new.emplace_back(new_designator_);
855 : : if (target_deleted)
856 : : deletions_.push_back(e);
857 : : else
858 : : deletions_.push_back(new_designator_);
859 : : new_designator_ = nullptr;
860 : : } else {
861 : : target_new.emplace_back(e);
862 : : }
863 : : }
864 : : target = std::move(target_new);
865 : : return std::move(deletions_);
866 : : }
867 : :
868 : : /** Replaces all designators in `target.first` by new ones pointing to the respective designator in `origin`.
869 : : * Returns all replaced designators.
870 : : * IMPORTANT: Replaced designators are neither deleted by this structure nor by `~Expr()` anymore;
871 : : * caller must delete them!!! */
872 : : std::vector<const ast::Expr*> replace(std::vector<std::pair<const ast::Expr*, const char*>> &target,
873 : : const std::vector<const ast::Expr*> *origin, bool target_deleted = false)
874 : : {
875 : : origin_ = origin;
876 : : deletions_.clear();
877 : : std::vector<std::pair<const ast::Expr*, const char*>> target_new;
878 : : for (auto e : target) {
879 : : (*this)(*e.first);
880 : : if (cast<const ast::Designator>(e.first) and new_designator_) {
881 : : /* `e.first` has to be replaced by `new_designator_`. `e.first` has to be deleted manually iff the
882 : : * target is deleted (since it is removed from `target`) and `new_designator` has to be deleted manually
883 : : * iff the target is not deleted. */
884 : : target_new.emplace_back(new_designator_, e.second);
885 : : if (target_deleted)
886 : : deletions_.push_back(e.first);
887 : : else
888 : : deletions_.push_back(new_designator_);
889 : : new_designator_ = nullptr;
890 : : } else {
891 : : target_new.emplace_back(e);
892 : : }
893 : : }
894 : : target = std::move(target_new);
895 : : return std::move(deletions_);
896 : : }
897 : :
898 : : private:
899 : : using ConstASTExprVisitor::operator();
900 : : void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
901 : :
902 : : void operator()(Const<ast::Designator> &e) override {
903 : : if (auto d = findDesignator(e))
904 : : new_designator_ = make_designator(d);
905 : : }
906 : :
907 : : void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
908 : :
909 : : void operator()(Const<ast::FnApplicationExpr> &e) override {
910 : : std::vector<std::unique_ptr<ast::Expr>> args_new;
911 : : for (auto &arg : e.args) {
912 : : (*this)(*arg);
913 : : if (new_designator_) {
914 : : /* Replace `arg` by the newly generated designator. */
915 : : deletions_.push_back(arg.get());
916 : : args_new.emplace_back(new_designator_);
917 : : new_designator_ = nullptr;
918 : : } else {
919 : : args_new.emplace_back(arg.get());
920 : : }
921 : : }
922 : : const_cast<ast::FnApplicationExpr*>(&e)->args = std::move(args_new);
923 : : }
924 : :
925 : : void operator()(Const<ast::UnaryExpr> &e) override {
926 : : (*this)(*e.expr);
927 : : if (new_designator_) {
928 : : /* Replace `expr` by the newly generated designator. */
929 : : deletions_.push_back(e.expr.get());
930 : : const_cast<ast::UnaryExpr*>(&e)->expr = std::unique_ptr<ast::Designator>(new_designator_);
931 : : new_designator_ = nullptr;
932 : : }
933 : : }
934 : :
935 : : void operator()(Const<ast::BinaryExpr> &e) override {
936 : : (*this)(*e.lhs);
937 : : if (new_designator_) {
938 : : /* Replace `expr` by the newly generated designator. */
939 : : deletions_.push_back(e.lhs.get());
940 : : const_cast<ast::BinaryExpr*>(&e)->lhs = std::unique_ptr<ast::Designator>(new_designator_);
941 : : new_designator_ = nullptr;
942 : : }
943 : : (*this)(*e.rhs);
944 : : if (new_designator_) {
945 : : /* Replace `expr` by the newly generated designator. */
946 : : deletions_.push_back(e.rhs.get());
947 : : const_cast<ast::BinaryExpr*>(&e)->rhs = std::unique_ptr<ast::Designator>(new_designator_);
948 : : new_designator_ = nullptr;
949 : : }
950 : : }
951 : :
952 : : void operator()(Const<ast::QueryExpr>&) override { /* nothing to be done */ }
953 : :
954 : : /** Returns a `Designator` equal to `des` iff it exists in `origin_`, and `nullptr` otherwise. */
955 : : const ast::Designator * findDesignator(const ast::Designator &des) {
956 : : for (auto e : *origin_) {
957 : : if (auto d = cast<const ast::Designator>(e)) {
958 : : if (equal(*d, des))
959 : : return d;
960 : : }
961 : : }
962 : : return nullptr;
963 : : }
964 : :
965 : : /** Returns a designator with an empty table name, the textual representation of `d` as attribute
966 : : * name, the same type as `d`, and `d` as target. */
967 : : ast::Designator * make_designator(const ast::Designator *d) {
968 : : Catalog &C = Catalog::Get();
969 : : Position pos(C.pool("Decorrelation"));
970 : : ast::Token dot(pos, C.pool("."), TK_DOT);
971 : : std::ostringstream oss;
972 : : oss << *d;
973 : : ast::Token attr(pos, C.pool(oss.str().c_str()), TK_IDENTIFIER);
974 : : auto D = new ast::Designator(dot, ast::Token(), attr, as<const PrimitiveType>(d->type())->as_scalar(), d);
975 : : return D;
976 : : }
977 : : };
978 : : #endif
979 : :
980 : : /** Helper structure to extract correlation information of a `cnf:CNF` formula. */
981 : : #if 0
982 : : struct m::GetCorrelationInfo : ast::ConstASTExprVisitor
983 : : {
984 : : friend struct Decorrelation;
985 : :
986 : : /** Holds correlation information of a single `cnf::Clause`. */
987 : : struct CorrelationInfo : ast::ASTExprVisitor
988 : : {
989 : : cnf::Clause clause; ///< the `cnf::Clause` the information is about
990 : : std::set<const ast::Expr*> uncorrelated_exprs; ///< a set of all uncorrelated designators and aggregates in the `clause`
991 : : std::set<const char*> sources; ///< a set of all sources of correlated designators occurring in the `clause`
992 : : private:
993 : : const char *table_name_; ///< the new table name used in `replace()`
994 : : std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new_outer_; ///< maps old designators to new ones generated by the caller
995 : : std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new_inner_; ///< maps old designators to new ones generated by `replace()`
996 : : std::unordered_set<const ast::Designator*> replaced_designators_; ///< all replaced designators which have to be deleted
997 : : bool is_decorrelation_finished_ = false; ///< indicates whether the decorrelation process is finished, used to remove the decorrelation flag of designators
998 : : // TODO move the post-decorrelation processing into a separate class
999 : :
1000 : : public:
1001 : : CorrelationInfo(cnf::Clause clause) : clause(std::move(clause)) { }
1002 : :
1003 : : /** Updates `uncorrelated_exprs` according to `old_to_new` and replaces all table names of all designators in
1004 : : * this list by `table_name`. Apply all these changes to the `clause`, too.
1005 : : * Since replaced designators are neither deleted by this structure nor by `~Expr()` anymore, add them to
1006 : : * the given expansion list for later deletion. */
1007 : : void replace(std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> &old_to_new,
1008 : : const char *table_name, std::vector<const ast::Expr*> &expansion) {
1009 : : table_name_ = table_name;
1010 : : old_to_new_outer_ = std::move(old_to_new);
1011 : : /* Update `clause`. */
1012 : : for (auto pred : clause)
1013 : : (*this)(*pred);
1014 : : /* Update `uncorrelated_exprs`. */
1015 : : std::set<const ast::Expr*> uncorrelated_exprs_new;
1016 : : for (auto e : uncorrelated_exprs) {
1017 : : if (auto d = cast<const ast::Designator>(e)) {
1018 : : auto d_new = get_new(d);
1019 : : try {
1020 : : uncorrelated_exprs_new.emplace(old_to_new_inner_.at(d_new));
1021 : : /* Occurrence of `d` was possibly replaced by newly created designator and not by `d_new`.
1022 : : * Therefore, add `d_new` to `replaced_designators` to ensure deletion by caller. */
1023 : : replaced_designators_.emplace(d_new);
1024 : : } catch (std::out_of_range&) {
1025 : : /* `d_new` was not found in `old_to_new_inner` since no table name was specified and no new
1026 : : * designator was created by this function. Add `d_new` as new uncorrelated expression. */
1027 : : M_insist(not table_name_);
1028 : : uncorrelated_exprs_new.emplace(d_new);
1029 : : }
1030 : : } else {
1031 : : uncorrelated_exprs_new.emplace(e);
1032 : : }
1033 : : }
1034 : : uncorrelated_exprs = std::move(uncorrelated_exprs_new);
1035 : : old_to_new_inner_.clear();
1036 : : /* Add replaced designators to expansion list. */
1037 : : for (auto d : replaced_designators_)
1038 : : expansion.push_back(d);
1039 : : replaced_designators_.clear();
1040 : : }
1041 : :
1042 : : /** Remove decorrelation flag of all occurring designators in `clause`. */
1043 : : void finish_decorrelation() {
1044 : : is_decorrelation_finished_ = true;
1045 : : for (auto &p : clause)
1046 : : (*this)(*p);
1047 : : }
1048 : :
1049 : : private:
1050 : : using ASTExprVisitor::operator();
1051 : : void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
1052 : :
1053 : : void operator()(Const<ast::Designator> &d) override { if (is_decorrelation_finished_) d.set_bound(); }
1054 : :
1055 : : void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
1056 : :
1057 : : void operator()(Const<ast::FnApplicationExpr>&) override { /* nothing to be done */ }
1058 : :
1059 : : void operator()(Const<ast::UnaryExpr> &e) override {
1060 : : if (is_decorrelation_finished_) {
1061 : : (*this)(*e.expr);
1062 : : } else {
1063 : : if (auto d = cast<ast::Designator>(e.expr.get()); d and contains(uncorrelated_exprs, d)) {
1064 : : /* Replace this designator by an equivalent new one. Set table name iff present. */
1065 : : e.expr = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1066 : : replaced_designators_.emplace(d);
1067 : : } else {
1068 : : (*this)(*e.expr);
1069 : : }
1070 : : }
1071 : : }
1072 : :
1073 : : void operator()(Const<ast::BinaryExpr> &e) override {
1074 : : if (is_decorrelation_finished_) {
1075 : : (*this)(*e.lhs);
1076 : : (*this)(*e.rhs);
1077 : : } else {
1078 : : if (auto d = cast<ast::Designator>(e.lhs.get()); d and contains(uncorrelated_exprs, d)) {
1079 : : /* Replace this designator by an equivalent new one. Set table name iff present. */
1080 : : e.lhs = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1081 : : replaced_designators_.emplace(d);
1082 : : } else {
1083 : : (*this)(*e.lhs);
1084 : : }
1085 : : if (auto d = cast<ast::Designator>(e.rhs.get()); d and contains(uncorrelated_exprs, d)) {
1086 : : /* Replace this designator by an equivalent new one. Set table name iff present. */
1087 : : e.rhs = std::unique_ptr<ast::Expr>(table_name_ ? set_table_name(get_new(d)) : get_new(d));
1088 : : replaced_designators_.emplace(d);
1089 : : } else {
1090 : : (*this)(*e.rhs);
1091 : : }
1092 : : }
1093 : : }
1094 : :
1095 : : void operator()(Const<ast::QueryExpr>&) override { /* nothing to be done */ }
1096 : :
1097 : : ast::Designator * get_new(ast::Designator *d) {
1098 : : if (auto it = old_to_new_outer_.find(d); it != old_to_new_outer_.end())
1099 : : return it->second;
1100 : : else
1101 : : return d;
1102 : : }
1103 : : const ast::Designator * get_new(const ast::Designator *d) {
1104 : : if (auto it = old_to_new_outer_.find(d); it != old_to_new_outer_.end())
1105 : : return it->second;
1106 : : else
1107 : : return d;
1108 : : }
1109 : :
1110 : : ast::Designator * set_table_name(const ast::Designator *d) {
1111 : : Catalog &C = Catalog::Get();
1112 : : Position pos(C.pool("Decorrelation"));
1113 : : ast::Token dot(pos, C.pool("."), TK_DOT);
1114 : : ast::Token tbl(pos, C.pool(table_name_), TK_IDENTIFIER);
1115 : : auto D = new ast::Designator(dot, tbl, d->attr_name, d->type(), d);
1116 : : old_to_new_inner_.emplace(d, D);
1117 : : return D;
1118 : : }
1119 : : };
1120 : :
1121 : : private:
1122 : : std::vector<CorrelationInfo*> equi_; ///< a list of all correlation information of equi-predicates
1123 : : std::vector<CorrelationInfo*> non_equi_; ///< a list of all correlation information of non-equi-predicates
1124 : :
1125 : : CorrelationInfo *info_; ///< the current `CorrelationInfo` object
1126 : :
1127 : : std::vector<const ast::Expr*> expansion_; ///< list of designators expanded by decorrelation steps
1128 : :
1129 : : public:
1130 : : GetCorrelationInfo() { }
1131 : : ~GetCorrelationInfo() {
1132 : : for (auto d : expansion_)
1133 : : delete d;
1134 : : }
1135 : :
1136 : : /** Given a cnf::CNF formula, compute all correlation information of this formula.
1137 : : * Return the remaining `cnf::CNF` which only consists of uncorrelated predicates. */
1138 : : cnf::CNF compute(const cnf::CNF &cnf) {
1139 : : cnf::CNF res;
1140 : : for (auto &c : cnf) { // iterate over a single clause (de facto)
1141 : : info_ = new CorrelationInfo(c); // store reference to the *currently built* CorrelationInfo
1142 : : bool is_equi = c.size() <= 1; // the clause can only be an equi-predicate if there is no disjunction
1143 : : for (auto &p : c) { // fill `info_` for each predicate of clause `c`
1144 : : (*this)(*p); // visitor GetCorrelationInfo(p), fills the current `info_` with data
1145 : : if (p.negative())
1146 : : is_equi = is_equi and is_not_equal(&p.expr());
1147 : : else
1148 : : is_equi = is_equi and is_equal(&p.expr());
1149 : : }
1150 : : if (info_->sources.empty()) {
1151 : : /* c is not correlated. */
1152 : : res = res and cnf::CNF({c});
1153 : : delete info_;
1154 : : } else {
1155 : : /* c is correlated. */
1156 : : if (is_equi)
1157 : : equi_.push_back(info_);
1158 : : else
1159 : : non_equi_.push_back(info_);
1160 : : }
1161 : : }
1162 : : return res;
1163 : : }
1164 : :
1165 : : /** Returns a list of all correlation information of equi-predicates. */
1166 : : std::vector<CorrelationInfo*> getEqui() { return std::move(equi_); }
1167 : : /** Returns a list of all correlation information of non-equi-predicates. */
1168 : : std::vector<CorrelationInfo*> getNonEqui() { return std::move(non_equi_); }
1169 : :
1170 : : bool empty() { return equi_.empty() and non_equi_.empty(); }
1171 : : bool non_equi() { return not non_equi_.empty(); }
1172 : :
1173 : : private:
1174 : : using ConstASTExprVisitor::operator();
1175 : : void operator()(Const<ast::ErrorExpr>&) override { M_unreachable("graph must not contain errors"); }
1176 : :
1177 : : void operator()(Const<ast::Designator> &e) override {
1178 : : if (e.contains_free_variables()) {
1179 : : auto names = find_table_name(e);
1180 : : info_->sources.insert(names.begin(), names.end());
1181 : : } else {
1182 : : info_->uncorrelated_exprs.emplace(&e);
1183 : : }
1184 : : }
1185 : :
1186 : : void operator()(Const<ast::Constant>&) override { /* nothing to be done */ }
1187 : :
1188 : : void operator()(Const<ast::FnApplicationExpr> &e) override { info_->uncorrelated_exprs.emplace(&e); }
1189 : :
1190 : : void operator()(Const<ast::UnaryExpr> &e) override { (*this)(*e.expr); }
1191 : :
1192 : : void operator()(Const<ast::BinaryExpr> &e) override { (*this)(*e.lhs); (*this)(*e.rhs); }
1193 : :
1194 : : void operator()(Const<ast::QueryExpr>&) override { /* nothing to be done */ }
1195 : :
1196 : : bool is_equal(const ast::Expr *e) {
1197 : : if (auto b = cast<const ast::BinaryExpr>(e))
1198 : : return b->op() == TK_EQUAL;
1199 : : else
1200 : : return true;
1201 : : }
1202 : :
1203 : : bool is_not_equal(const ast::Expr *e) {
1204 : : if (auto b = cast<const ast::BinaryExpr>(e))
1205 : : return b->op() == TK_BANG_EQUAL;
1206 : : else
1207 : : return true;
1208 : : }
1209 : :
1210 : : std::set<const char*> find_table_name(const ast::Designator &d) {
1211 : : if (d.has_table_name()) return {d.get_table_name()};
1212 : : if (std::holds_alternative<const Attribute*>(d.target())) {
1213 : : return {std::get<const Attribute *>(d.target())->table.name};
1214 : : } else if (std::holds_alternative<const ast::Expr*>(d.target())) {
1215 : : GetDesignators GD;
1216 : : GD(*std::get<const ast::Expr*>(d.target()));
1217 : : std::set<const char*> res;
1218 : : for (auto des : GD.get()) {
1219 : : /* Skip `FnApplicationExpr`s because they have no table name. */
1220 : : if (auto d = cast<const ast::Designator>(des)) {
1221 : : auto names = find_table_name(*d);
1222 : : res.insert(names.begin(), names.end());
1223 : : }
1224 : : }
1225 : : return res;
1226 : : } else {
1227 : : M_unreachable("target can not be `std::monostate`");
1228 : : }
1229 : : }
1230 : : };
1231 : : #endif
1232 : :
1233 : : /** The method that decorrelates a `Query` in a `QueryGraph`. */
1234 : : #if 0
1235 : : struct m::Decorrelation
1236 : : {
1237 : : private:
1238 : : ///> `first` is the query graph of query `second`
1239 : : using graphs_t = std::pair<QueryGraph*, Query*>;
1240 : :
1241 : : ///> the query to decorrelate, that is a `DataSource` of `graph_`
1242 : : Query &query_;
1243 : : ///> the query graph containing `query_` as `DataSource`
1244 : : QueryGraph *graph_;
1245 : :
1246 : : std::vector<graphs_t> correlated_nested_queries_; ///< a list of nested queries with correlated predicates
1247 : : std::vector<const ast::Expr*> needed_attrs_; ///< a list of all needed attributes of `graph_`
1248 : :
1249 : : public:
1250 : : Decorrelation(Query &query, QueryGraph *graph)
1251 : : : query_(query)
1252 : : , graph_(graph)
1253 : : , correlated_nested_queries_({{graph, nullptr}})
1254 : : { }
1255 : :
1256 : : /** Decorrelates `query_` by predicate push-up.
1257 : : * Returns `true` if all correlated predicates are decorrelated. */
1258 : : bool decorrelate() {
1259 : : /* Find the innermost, correlated graph and save the path in `graphs_`. */
1260 : : find_correlated_nested_graphs(query_.query_graph(), &query_);
1261 : :
1262 : : /* Push-up all equi-predicates (starting with the innermost query) until the first non-equi-predicate below a
1263 : : * grouping is reached. */
1264 : : while (correlated_nested_queries_.size() > 1) {
1265 : : auto last = correlated_nested_queries_.back();
1266 : : if (last.first->correlation_info_->non_equi() and last.first->grouping()) break;
1267 : : correlated_nested_queries_.pop_back();
1268 : : push_predicates(correlated_nested_queries_.back().first, last.second);
1269 : : }
1270 : :
1271 : : M_insist(not correlated_nested_queries_.empty());
1272 : :
1273 : : if (correlated_nested_queries_.size() > 1) {
1274 : : /* Initialize `needed_attrs_` for operator push-up. */
1275 : : needed_attrs_ = get_needed_attrs(graph_, query_);
1276 : : /* Provide all table names explicitly because the attribute name does not have to be unique anymore. */
1277 : : for (auto des : needed_attrs_) {
1278 : : /* Skip `FnApplicationExpr`s because they have no table name. */
1279 : : if (auto d = cast<const ast::Designator>(des); d and not d->has_explicit_table_name())
1280 : : const_cast<ast::Designator*>(d)->table_name.type = TK_IDENTIFIER;
1281 : : }
1282 : :
1283 : : /* Push-up all operators until the innermost non-equi-predicate is reached. */
1284 : : for (auto it = correlated_nested_queries_.begin(); it != std::prev(correlated_nested_queries_.end());) {
1285 : : auto &upper = *it;
1286 : : ++it;
1287 : : auto &lower = *it;
1288 : : pushOperators(lower.first, upper.first, lower.second);
1289 : : }
1290 : :
1291 : : /* Adapt all designators above the innermost non-equi-predicate. */
1292 : : auto *origin = &correlated_nested_queries_.back().first->group_by_;
1293 : : M_insist(not origin->empty());
1294 : : for (auto i = correlated_nested_queries_.size() - 1; i != 0; --i) {
1295 : : auto &upper = correlated_nested_queries_[i - 1];
1296 : : M_insist(not origin->empty());
1297 : : replace_designators(upper.first, origin);
1298 : : if (upper.first->grouping())
1299 : : origin = &upper.first->group_by_;
1300 : : }
1301 : : }
1302 : :
1303 : : auto iterator = correlated_nested_queries_.end(); // iterator to the innermost graph which is not completely decorrelated
1304 : : for (auto it = correlated_nested_queries_.begin(); it != correlated_nested_queries_.end(); ++it) {
1305 : : if (not (*it).first->correlation_info_->empty()) {
1306 : : /* In this graph not all correlated predicates are decorrelated. */
1307 : : iterator = it;
1308 : : }
1309 : : }
1310 : : if (iterator == correlated_nested_queries_.end()) {
1311 : : /* All correlated predicates are decorrelated. */
1312 : : return true;
1313 : : } else {
1314 : : /* Not all correlated predicates are decorrelated. Unset `decorrelated_`-flag of all queries until the
1315 : : * innermost not decorrelated predicate. */
1316 : : for (auto it = correlated_nested_queries_.begin(); it != iterator;)
1317 : : (*++it).second->decorrelated_ = false;
1318 : : return false;
1319 : : }
1320 : : }
1321 : :
1322 : : private:
1323 : : /** Adds all not yet decorrelated query graphs to `graphs_` starting with `graph`. Recurses into nested queries
1324 : : * that are still correlated. */
1325 : : void find_correlated_nested_graphs(QueryGraph *graph, Query *query) {
1326 : : correlated_nested_queries_.emplace_back(graph, query);
1327 : :
1328 : : bool found = false;
1329 : : for (auto ds : graph->sources()) {
1330 : : if (ds->decorrelated_) continue; // already decorrelated, nothing to be done
1331 : : M_insist(not found, "Multiple not yet decorrelated nested queries in one graph should not be possible.");
1332 : : found = true;
1333 : : auto q = as<Query>(ds);
1334 : : find_correlated_nested_graphs(q->query_graph(), q);
1335 : : }
1336 : : }
1337 : :
1338 : : /** Pushes all correlated predicates contained in `inner_graph` up into `outer_graph`. Therefore, the projection and
1339 : : * grouping of `inner_graph` are adapted and a join between `query` and all involved sources of each predicate is
1340 : : * added in `outer_graph` iff all sources are reachable. Otherwise, the correlated predicate has to be pushed
1341 : : * further up and, therefore, is added to `outer_graph.info_`. */
1342 : : void push_predicates(QueryGraph *outer_graph, Query *query) {
1343 : : QueryGraph *inner_graph = query->query_graph();
1344 : : M_insist(contains(outer_graph->sources(), query));
1345 : :
1346 : : const bool inner_has_projection = not inner_graph->projections().empty();
1347 : : auto grouping = inner_graph->grouping();
1348 : : auto equi = inner_graph->correlation_info_->getEqui();
1349 : : auto non_equi = inner_graph->correlation_info_->getNonEqui();
1350 : : inner_graph->correlation_info_->equi_.clear();
1351 : : inner_graph->correlation_info_->non_equi_.clear();
1352 : : bool is_equi = true;
1353 : : M_insist(non_equi.empty() or not grouping);
1354 : : for (auto it = equi.begin(); it != non_equi.end(); ++it) {
1355 : : if (it == equi.end()) {
1356 : : it = non_equi.begin(); // to iterate over equi and non_equi
1357 : : is_equi = false;
1358 : : if (it == non_equi.end()) break; // to skip empty non_equi
1359 : : }
1360 : : auto correlation_info = *it;
1361 : : /* Adapt projection and grouping. */
1362 : : std::unordered_map<const ast::Designator*, std::unique_ptr<ast::Designator>> old_to_new;
1363 : : for (auto e : correlation_info->uncorrelated_exprs) {
1364 : : const ast::Expr *e_new = e;
1365 : : if (grouping) { // ensure that `e` is maintained through grouping
1366 : : emplace_back_unique(inner_graph->group_by_, e); // the `inner_graph` must group by `e`
1367 : : /*----- Create name for projecting `e`. -----*/
1368 : : if (auto d = cast<const ast::Designator>(e)) {
1369 : : /* Create a new designator pointing to `d` (the grouping key) and add respective mapping. */
1370 : : Catalog &C = Catalog::Get();
1371 : : std::ostringstream oss;
1372 : : oss << *d;
1373 : : Position pos(C.pool("Decorrelation"));
1374 : : ast::Token attr(pos, C.pool(oss.str().c_str()), TK_IDENTIFIER);
1375 : : auto d_new = std::make_unique<ast::Designator>(attr, ast::Token(), attr, as<const PrimitiveType>(d->type())->as_scalar(), d);
1376 : : auto res = old_to_new.emplace(d, std::move(d_new));
1377 : : e_new = res.first->second.get();
1378 : : }
1379 : : }
1380 : : if (inner_has_projection)
1381 : : emplace_back_unique(
1382 : : inner_graph->projections_,
1383 : : std::pair<const ast::Expr*, const char*>(e_new, /* no alias */ nullptr)
1384 : : );
1385 : : }
1386 : : /* Update `correlation_info->uncorrelated_expr` and `correlation_info->clause` by replacing the table name
1387 : : * of all uncorrelated designators by the alias of `query` (if given) because it will be pushed up and
1388 : : * therefore the former sources are only reachable via `query`. */
1389 : : correlation_info->replace(old_to_new, query->alias(), inner_graph->correlation_info_->expansion_);
1390 : : /* Search needed sources. */
1391 : : std::vector<DataSource*> sources{query};
1392 : : auto all_reachable = true;
1393 : : for (auto s : correlation_info->sources) {
1394 : : if (auto src = findSource(outer_graph->sources(), s)) {
1395 : : emplace_back_unique(sources, src);
1396 : : } else {
1397 : : /* The source s is not reachable. */
1398 : : all_reachable = false;
1399 : : break;
1400 : : }
1401 : : }
1402 : : /* Add join or rather filter. */
1403 : : if (all_reachable) {
1404 : : correlation_info->finish_decorrelation();
1405 : : if (auto j = findJoin(outer_graph->joins(), sources)) {
1406 : : /* A join with the same data sources already exists so only update the predicate. */
1407 : : j->update_condition(cnf::CNF({correlation_info->clause}));
1408 : : } else {
1409 : : /* Create a new join for the data sources. */
1410 : : auto J = outer_graph->joins_.emplace_back(new Join(cnf::CNF({correlation_info->clause}), sources));
1411 : : for (auto ds : J->sources())
1412 : : ds->add_join(J);
1413 : : }
1414 : : delete correlation_info;
1415 : : } else {
1416 : : /* Add `i` to the upper correlation information. */
1417 : : if (is_equi)
1418 : : outer_graph->correlation_info_->equi_.push_back(correlation_info);
1419 : : else
1420 : : outer_graph->correlation_info_->non_equi_.push_back(correlation_info);
1421 : : }
1422 : : }
1423 : : query->decorrelated_ = true;
1424 : : }
1425 : :
1426 : : /** Pushes all operators (projection and grouping) contained in `lower` up above the sources of `upper`. Therefore,
1427 : : * the projection and grouping of `lower` are adapted and the sources of `upper` are added to the sources of
1428 : : * `lower`. Furthermore, the former join of `query` is transformed into its filter and a join between all
1429 : : * involved sources of each predicate is added in `lower` iff all sources are reachable.
1430 : : * Otherwise, the operators of the graph below has to be pushed further up and, therefore, the correlated
1431 : : * predicate is still contained in `lower.info_`. */
1432 : : void pushOperators(QueryGraph *lower, QueryGraph *upper, Query *query) {
1433 : : M_insist(query->query_graph() == lower);
1434 : : M_insist(contains(upper->sources(), query));
1435 : : auto lower_sources = lower->sources();
1436 : :
1437 : : /* Remove joins of `query` and add conditions as filter. */
1438 : : for (auto j : query->joins()) {
1439 : : query->update_filter(j->condition());
1440 : : upper->remove_join(j);
1441 : : for (auto ds : j->sources())
1442 : : ds->remove_join(j);
1443 : : delete j;
1444 : : }
1445 : : auto grouping = lower->grouping();
1446 : : /* Move all sources except `query` from `upper` to `lower` and adapt grouping of `lower`.
1447 : : * Change source id such that the id's of all sources of `lower` are sequential. */
1448 : : size_t num_sources = lower->sources().size();
1449 : : for (auto it = upper->sources_.begin(); it != upper->sources_.end(); ++it) {
1450 : : if (*it == query) continue;
1451 : : (*it)->id_ = num_sources++;
1452 : : lower->sources_.emplace_back(*it);
1453 : : if (grouping) {
1454 : : auto primary_key = get_primary_key(*it);
1455 : : M_insist(not primary_key.empty(), "can not push-up grouping above source without primary key");
1456 : : insert(lower->group_by_, primary_key);
1457 : : }
1458 : : }
1459 : : query->id_ = 0;
1460 : : upper->sources_ = {query};
1461 : : /* Move all joins of `upper` to `lower`. */
1462 : : lower->joins_.insert(lower->joins_.end(), upper->joins_.begin(), upper->joins_.end());
1463 : : upper->joins_.clear();
1464 : : /* Add all later needed attributes (of `graph_`) to grouping and projection of `lower`. */
1465 : : if (grouping)
1466 : : insert(lower->group_by_, needed_attrs_);
1467 : : if (not lower->projections().empty()) {
1468 : : for (auto attr : needed_attrs_)
1469 : : emplace_back_unique(lower->projections_, std::pair<const ast::Expr*, const char*>(attr, nullptr));
1470 : : }
1471 : : /* Add joins for correlated predicates iff possible. */
1472 : : auto equi = lower->correlation_info_->getEqui();
1473 : : auto non_equi = lower->correlation_info_->getNonEqui();
1474 : : lower->correlation_info_->equi_.clear();
1475 : : lower->correlation_info_->non_equi_.clear();
1476 : : bool is_equi = true;
1477 : : for (auto it = equi.begin(); it != non_equi.end(); ++it) {
1478 : : if (it == equi.end()) {
1479 : : it = non_equi.begin(); // to iterate over equi and non_equi
1480 : : is_equi = false;
1481 : : if (it == non_equi.end()) break; // to skip empty non_equi
1482 : : }
1483 : : auto i = *it;
1484 : : /* Search needed sources. Since `lower` could initially contain multiple sources which contribute to this
1485 : : * join we have to check the table names of uncorrelated designators, too. */
1486 : : std::vector<DataSource*> sources;
1487 : : auto all_reachable = true;
1488 : : for (auto e : i->uncorrelated_exprs) {
1489 : : if (auto d = cast<const ast::Designator>(e)) {
1490 : : if (auto src = findSource(lower->sources(), d->get_table_name())) {
1491 : : emplace_back_unique(sources, src);
1492 : : } else {
1493 : : /* The source d->get_table_name() is not reachable. */
1494 : : all_reachable = false;
1495 : : break;
1496 : : }
1497 : : } else {
1498 : : /* Aggregation can not be assigned to a specific source so add all initially available ones. */
1499 : : sources = lower_sources;
1500 : : break;
1501 : : }
1502 : : }
1503 : : for (auto s : i->sources) {
1504 : : if (auto src = findSource(lower->sources(), s)) {
1505 : : emplace_back_unique(sources, src);
1506 : : } else {
1507 : : /* The source s is not reachable. */
1508 : : all_reachable = false;
1509 : : break;
1510 : : }
1511 : : }
1512 : : /* Add join. */
1513 : : if (all_reachable) {
1514 : : i->finish_decorrelation();
1515 : : if (auto j = findJoin(lower->joins(), sources)) {
1516 : : /* A join with the same data sources already exists so only update the predicate. */
1517 : : j->update_condition(cnf::CNF({i->clause}));
1518 : : } else {
1519 : : /* Create a new join for the data sources. */
1520 : : auto J = lower->joins_.emplace_back(new Join(cnf::CNF({i->clause}), sources));
1521 : : for (auto ds : J->sources())
1522 : : ds->add_join(J);
1523 : : }
1524 : : delete i;
1525 : : } else {
1526 : : /* Add `i` again to the correlation information. */
1527 : : if (is_equi)
1528 : : lower->correlation_info_->equi_.push_back(i);
1529 : : else
1530 : : lower->correlation_info_->non_equi_.push_back(i);
1531 : : }
1532 : : }
1533 : : }
1534 : :
1535 : : /** Returns a `DataSource *` pointing to a data source with the same name as `name` iff it exists in `sources`,
1536 : : * and `nullptr` otherwise. */
1537 : : DataSource * findSource(const std::vector<DataSource*> &sources, const char *name) {
1538 : : for (auto s : sources) {
1539 : : if (auto q = cast<Query>(s); name == s->name() or (q and findSource(q->query_graph()->sources(), name)))
1540 : : return s;
1541 : : }
1542 : : return nullptr;
1543 : : }
1544 : :
1545 : : /** Returns a `Join *` pointing to a join with the same sources as `sources` iff it exists in `joins`, and
1546 : : * `nullptr` otherwise. */
1547 : : Join * findJoin(const std::vector<Join*> &joins, const std::vector<DataSource*> &sources) {
1548 : : std::unordered_set<DataSource*> cmp(sources.begin(), sources.end());
1549 : : for (auto j : joins) {
1550 : : if (cmp == std::unordered_set<DataSource*>(j->sources().begin(), j->sources().end()))
1551 : : return j;
1552 : : }
1553 : : return nullptr;
1554 : : }
1555 : :
1556 : : /** Replaces all designators in `graph` by new ones pointing to the respective designator in `origin`. */
1557 : : void replace_designators(QueryGraph *graph, const std::vector<const ast::Expr*> *origin) {
1558 : : M_insist(graph->sources().size() == 1);
1559 : : ReplaceDesignators RP;
1560 : : auto ds = *graph->sources().begin();
1561 : : auto filter_new = ds->filter();
1562 : : auto expansions = RP.replace(filter_new, origin);
1563 : : ds->filter_ = filter_new;
1564 : : graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1565 : : if (graph->grouping()) {
1566 : : expansions = RP.replace(graph->group_by_, origin);
1567 : : graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1568 : : expansions = RP.replace(graph->aggregates_, origin);
1569 : : graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1570 : : expansions = RP.replace(graph->projections_, &graph->group_by());
1571 : : graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1572 : : } else {
1573 : : expansions = RP.replace(graph->projections_, origin);
1574 : : graph->correlation_info_->expansion_.insert(graph->correlation_info_->expansion_.end(), expansions.begin(), expansions.end());
1575 : : }
1576 : : }
1577 : : };
1578 : : #endif
1579 : :
1580 : : /*======================================================================================================================
1581 : : * GraphBuilder
1582 : : *
1583 : : * An AST Visitor that constructs the query graph.
1584 : : *====================================================================================================================*/
1585 : :
1586 : :
1587 : 179 : void m::GraphBuilder::operator()(const ast::SelectStmt &stmt) {
1588 : 179 : Catalog &C = Catalog::Get();
1589 : :
1590 : : /*----- Collect all aggregates in the statement and save them in the graph. -----*/
1591 : 179 : graph_->aggregates_ = get_aggregates(stmt);
1592 : :
1593 : : /*----- Compute whether the query needs grouping. -----*/
1594 [ + + ]: 179 : needs_grouping_ = bool(stmt.group_by) or graph_->aggregates().size() > 0;
1595 : :
1596 : : /*----- Collect grouping keys of `stmt`. -----*/
1597 [ + + ]: 179 : if (stmt.group_by) {
1598 : 4 : auto &group_by = as<const ast::GroupByClause>(*stmt.group_by);
1599 [ + + ]: 8 : for (auto &[grp, alias] : group_by.group_by)
1600 : 8 : existing_grouping_keys_.emplace_back(*grp); // TODO correct?
1601 : 4 : }
1602 : :
1603 : : ///> holds all nested queries occurring in the `SelectClause` of `stmt`
1604 : 179 : std::vector<std::pair<std::reference_wrapper<const ast::QueryExpr>, ThreadSafePooledOptionalString>> nested_queries_in_select;
1605 : :
1606 : : /*----- Process FROM and create data sources. -----*/
1607 [ + + ]: 179 : if (stmt.from) {
1608 [ + - ]: 178 : auto &FROM = as<ast::FromClause>(*stmt.from);
1609 [ + + ]: 558 : for (auto &tbl : FROM.from) {
1610 [ + - ]: 380 : if (auto tok = std::get_if<ast::Token>(&tbl.source)) {
1611 : : /* Create a new base table. */
1612 [ + - + - ]: 380 : M_insist(tbl.has_table());
1613 [ + - + + : 380 : auto &base = graph_->add_source(tbl.alias ? tbl.alias.text : ThreadSafePooledOptionalString{}, tbl.table());
+ - + - +
- + + + +
# # # # ]
1614 [ + - - + ]: 380 : named_sources_.emplace(base.name(), base);
1615 [ # # ]: 380 : } else if (auto stmt = std::get_if<ast::Stmt*>(&tbl.source)) {
1616 [ # # # # ]: 0 : M_insist(tbl.alias.text.has_value(), "every nested statement requires an alias");
1617 [ # # ]: 0 : auto &select = as<ast::SelectStmt>(**stmt);
1618 : : /* Create a graph for the sub query. */
1619 [ # # ]: 0 : auto graph = QueryGraph::Build(select);
1620 [ # # # # ]: 0 : auto &Q = graph_->add_source(tbl.alias.text, std::move(graph));
1621 [ # # # # ]: 0 : M_insist(tbl.alias);
1622 [ # # ]: 0 : named_sources_.emplace(tbl.alias.text, Q);
1623 : 0 : } else {
1624 [ # # ]: 0 : M_unreachable("invalid variant");
1625 : : }
1626 : : }
1627 : 178 : }
1628 : :
1629 : : /* Do some computation before first nested query is decorrelated to ensure that `get_needed_attrs()` works as
1630 : : * intended: */
1631 : :
1632 : : /*----- Process GROUP BY and collect grouping keys. -----*/
1633 [ + + ]: 179 : if (stmt.group_by) {
1634 [ + - ]: 4 : auto &GROUP_BY = as<ast::GroupByClause>(*stmt.group_by);
1635 [ + + ]: 8 : for (auto &[grp, alias] : GROUP_BY.group_by)
1636 [ + - + - : 12 : graph_->group_by_.emplace_back(*grp, alias.text);
+ - ]
1637 : 4 : }
1638 : :
1639 : : /*----- Process SELECT and collect projections. -----*/
1640 [ + - ]: 179 : auto SELECT = as<ast::SelectClause>(stmt.select.get());
1641 [ + + ]: 1149 : for (auto &e : SELECT->expanded_select_all)
1642 [ + - ]: 970 : graph_->projections_.emplace_back(*e, ThreadSafePooledOptionalString{}); // expansions never contain nested queries;
1643 : : // directly add to projections
1644 : : /* Collect nested queries in SELECT clause. */
1645 [ + + ]: 194 : for (auto &s : SELECT->select) {
1646 [ + - + - ]: 30 : if (auto query = cast<const ast::QueryExpr>(s.first)) // found nested query
1647 [ # # ]: 0 : nested_queries_in_select.emplace_back(*query, s.second.text);
1648 : : else
1649 [ + - ]: 15 : graph_->projections_.emplace_back(*s.first, s.second.text); // add to projections
1650 : : }
1651 : :
1652 : : /*----- Process WHERE clause. -----*/
1653 [ + + ]: 179 : if (stmt.where) {
1654 : : /*----- Get WHERE clause and convert to CNF. -----*/
1655 [ + - ]: 78 : auto &WHERE = as<ast::WhereClause>(*stmt.where);
1656 [ + - ]: 78 : auto cnf_where = cnf::to_CNF(*WHERE.where);
1657 : :
1658 : : /*----- Analyze all CNF clauses of the WHERE clause. -----*/
1659 [ + + ]: 345 : for (auto &clause : cnf_where)
1660 [ + - ]: 267 : process_selection(clause);
1661 : :
1662 : : /*----- Introduce additional grouping keys. -----*/
1663 [ + - ]: 78 : for (auto e : additional_grouping_keys_) {
1664 [ # # ]: 0 : graph_->group_by_.emplace_back(e, ThreadSafePooledOptionalString{});
1665 [ # # ]: 0 : graph_->projections_.emplace_back(e, ThreadSafePooledOptionalString{});
1666 : : }
1667 : :
1668 [ + + ]: 1143 : for (auto &[clause, CI] : bound_clauses_) {
1669 [ + - + + ]: 267 : if (CI.is_constant()) {
1670 [ + + ]: 5 : for (auto &[_, src] : named_sources_)
1671 [ - + - + : 6 : src.get().update_filter(cnf::CNF{std::move(clause)}); // NOTE: kind of silly, but it works
- + - + ]
1672 [ + - + + ]: 267 : } else if (CI.is_selection()) {
1673 [ + - + - ]: 10 : auto it = named_sources_.find(*CI.data_sources.begin());
1674 [ + - ]: 5 : M_insist(it != named_sources_.end(), "data source with that name was not found");
1675 [ - + - + : 10 : it->second.get().update_filter(cnf::CNF{std::move(clause)});
- + - + ]
1676 : 5 : } else {
1677 : 260 : Join::sources_t sources;
1678 [ + + + - ]: 781 : for (auto src_name : CI.data_sources) {
1679 [ + - ]: 521 : auto it = named_sources_.find(src_name);
1680 [ + - ]: 521 : M_insist(it != named_sources_.end(), "data source with that name was not found");
1681 [ + - ]: 521 : sources.emplace_back(it->second);
1682 : 521 : }
1683 [ + - + - : 520 : auto J = std::make_unique<Join>(cnf::CNF{std::move(clause)}, std::move(sources));
+ - + - ]
1684 [ + - ]: 260 : auto &ref = *graph_->joins_.emplace_back(std::move(J)); // add join to query graph
1685 [ + - + + ]: 781 : for (auto ds : ref.sources())
1686 [ + - ]: 521 : ds.get().add_join(ref); // add join to all data sources
1687 : 260 : }
1688 : : }
1689 : 78 : }
1690 : :
1691 : : #if 0
1692 : : /* Dissect CNF into joins, filters and nested queries. Performs unnesting immediately during graph
1693 : : * construction. Decorrelation is performed afterwards where possible. */
1694 : : /* TODO iterate over clauses in the order joins → filters/nested queries (equi) → nested queries (non-equi) */
1695 : : for (auto &clause : cnf_where) {
1696 : : const auto nested_queries = get_nested_queries(clause); // collect all nested queries of `clause`
1697 : : const auto tables = get_tables(clause);
1698 : : if (nested_queries.empty()) {
1699 : : /* Compute correlation information of this clause analogously. */
1700 : : if (tables.empty()) {
1701 : : /* This clause is a filter and constant. It applies to all data sources. Therefore, add it as a
1702 : : * filter condition to *each* source. */
1703 : : for (auto [_, source] : named_sources_)
1704 : : ; // TODO XXX
1705 : : source->update_filter(graph_->correlation_info_->compute(cnf::CNF{clause}));
1706 : : } else if (tables.size() == 1) {
1707 : : /* This clause is a filter condition on its single data source. */
1708 : : auto t = *begin(tables);
1709 : : auto ds = named_sources_.at(t);
1710 : : // TODO compute correlation information of `clause`
1711 : : // XXX
1712 : : // ds->update_filter(graph_->correlation_info_->compute(cnf::CNF({clause}))); //XXX
1713 : : } else {
1714 : : /* This clause is a join condition between two or more data sources. Create a new join in the query
1715 : : * graph. */
1716 : : Join::sources_t sources;
1717 : : for (auto t : tables)
1718 : : sources.emplace_back(named_sources_.at(t));
1719 : : // XXX
1720 : : // auto J = graph_->joins_.emplace_back(new Join(graph_->correlation_info_->compute(cnf::CNF({clause})), sources));
1721 : : // for (auto ds : J->sources())
1722 : : // ds->add_join(J);
1723 : : }
1724 : : } else {
1725 : : M_insist(nested_queries.size() == 1, "Multiple nested queries in one clause are not supported.");
1726 : : auto &nested_query = nested_queries.front().get();
1727 : : auto &nested_stmt = as<const ast::SelectStmt>(*nested_query.query); // get single, nested query
1728 : : auto &nested_select_clause = as<ast::SelectClause>(*nested_stmt.select);
1729 : : M_insist(not nested_select_clause.select_all, "* can not yet be used in nested queries.");
1730 : : M_insist(nested_select_clause.select.size() == 1, "only a single projection allowed");
1731 : :
1732 : : /* Replace the alias of the single projection by `$res`. */
1733 : : auto &single_projection = nested_select_clause.select.front();
1734 : : single_projection.second.text = C.pool("$res");
1735 : :
1736 : : /* Create a graph for the nested query. Unnest the query by adding it as a source to the outer graph */
1737 : : auto nested_query_name = nested_query.alias();
1738 : : auto &Q = graph_->add_source(nested_query_name, QueryGraph::Build(nested_stmt));
1739 : :
1740 : : /* Create a dependent join between the nested query and all tables in the clause. The dependent join
1741 : : * will later be resolved through decorrelation. */
1742 : : if (tables.empty()) {
1743 : : /* This clause is predicate with nested query `Q` and *no* tables of outer. It behaves like a
1744 : : * regular selection on outer. Since it contains the nested query `Q`, we have to use a join to an
1745 : : * arbitrary data source. TODO be clever when selecting the relation to join to. */
1746 : : M_insist(not named_sources_.empty(), "cannot join nested query to any source");
1747 : : /* TODO if Q is correlated use the referenced table */
1748 : : auto &any_data_source = *named_sources_.begin(); // pick *any* data source
1749 : : auto &J = graph_->joins_.emplace_back(new Join(cnf::CNF({clause}), {Q, any_data_source.second}));
1750 : : Q.add_join(*J); // add join to Q
1751 : : any_data_source.second.get().add_join(*J); // add join to the chosen data source of outer
1752 : : } else {
1753 : : /* This clause is a join condition. */
1754 : : Join::sources_t sources{Q};
1755 : : /* Create a join between outer's tables in `clause` and the nested query (as data source) `Q`. */
1756 : : for (auto t : tables)
1757 : : sources.emplace_back(named_sources_.at(t));
1758 : : auto &J = graph_->joins_.emplace_back(new Join(cnf::CNF({clause}), sources));
1759 : : for (auto ds : J->sources())
1760 : : ds.get().add_join(*J);
1761 : : }
1762 : :
1763 : : named_sources_.emplace(nested_query_name, Q); // XXX why?
1764 : :
1765 : : /* Decorrelate the nested query iff it is correlated. */
1766 : : if (Q.is_correlated())
1767 : : /* move the correlated predicate (i.e. the one with a free variable) to outer */
1768 : : decorrelate(Q);
1769 : : }
1770 : : }
1771 : : #endif
1772 : :
1773 : : /* Implement HAVING as a regular selection filter on a sub query. */
1774 : 179 : cnf::CNF cnf_having;
1775 [ + + ]: 179 : if (stmt.having) {
1776 [ + - ]: 2 : auto having_clause = as<ast::HavingClause>(stmt.having.get());
1777 [ + - ]: 2 : cnf_having = cnf::to_CNF(*having_clause->having);
1778 [ + - ]: 2 : auto sub_graph = std::exchange(graph_, std::make_unique<QueryGraph>());
1779 [ + - ]: 2 : auto &sub = graph_->add_source(ThreadSafePooledOptionalString{}, std::move(sub_graph));
1780 [ + - + - ]: 2 : sub.update_filter(cnf_having);
1781 [ + - + - ]: 2 : named_sources_.emplace(C.pool("HAVING"), sub);
1782 : : /* Reset former computation of projection and move it after the HAVING. */
1783 [ + - ]: 2 : std::swap(graph_->projections_, sub.query_graph().projections_);
1784 : : /* Update `decorrelated_`-flag. */
1785 [ + - + - ]: 2 : if (sub.is_correlated())
1786 : 0 : sub.decorrelated_ = false;
1787 : 2 : }
1788 : :
1789 : : /* Dissect CNF into filters and nested queries. Decorrelate if possible. */
1790 : : /* TODO iterate over clauses in the order filters/nested queries (equi) < nested queries (non-equi) */
1791 [ + + ]: 181 : for (auto &clause : cnf_having) {
1792 [ + - ]: 2 : ClauseInfo CI(clause);
1793 : 2 : auto &nested_queries = CI.nested_queries;
1794 [ + - ]: 2 : if (nested_queries.empty()) {
1795 : : // XXX
1796 : : // named_sources_.at("HAVING")->update_filter(graph_->correlation_info_->compute(cnf::CNF({clause})));
1797 : 2 : } else {
1798 [ # # ]: 0 : M_insist(nested_queries.size() == 1, "Multiple nested queries in one clause are not yet supported.");
1799 : 0 : auto &nested_query = nested_queries.front().expr;
1800 [ # # ]: 0 : auto &nested_stmt = as<const ast::SelectStmt>(*nested_query.query);
1801 [ # # ]: 0 : auto &nested_select_clause = as<ast::SelectClause>(*nested_stmt.select);
1802 [ # # # # ]: 0 : M_insist(not nested_select_clause.select_all, "* can not yet be used in nested queries.");
1803 [ # # ]: 0 : M_insist(nested_select_clause.select.size() == 1);
1804 : :
1805 : : /* Replace the alias of the expression by `$res`. */
1806 [ # # # # : 0 : nested_select_clause.select.front().second.text = C.pool("$res");
# # ]
1807 : :
1808 : : /* Create a graph for the nested query. Unnest the query by adding it as a source to the outer graph */
1809 [ # # ]: 0 : auto &q_name = nested_query.alias();
1810 [ # # # # : 0 : auto &Q = graph_->add_source(q_name, QueryGraph::Build(nested_stmt));
# # ]
1811 [ # # ]: 0 : named_sources_.emplace(q_name, Q);
1812 : :
1813 : : /* Create a join between the nested query and the HAVING. */
1814 [ # # # # : 0 : auto &J = graph_->joins_.emplace_back(new Join(cnf::CNF({clause}), {Q, named_sources_.at(C.pool("HAVING"))}));
# # # # #
# # # # #
# # # # ]
1815 [ # # # # ]: 0 : for (auto ds : J->sources())
1816 [ # # ]: 0 : ds.get().add_join(*J);
1817 : : }
1818 : 2 : }
1819 : :
1820 : : /* To implement nested queries in SELECT clause when the query also has grouping, create a subquery (similar to
1821 : : * HAVING) if not already done. */
1822 [ - + # # : 179 : if (not nested_queries_in_select.empty() and not stmt.having and
# # ]
1823 [ # # ]: 0 : (not graph_->group_by_.empty() or not graph_->aggregates_.empty()))
1824 : : {
1825 [ # # ]: 0 : auto sub_graph = std::exchange(graph_, std::make_unique<QueryGraph>());
1826 [ # # ]: 0 : auto &sub = graph_->add_source(ThreadSafePooledOptionalString{}, std::move(sub_graph)); // anonymous source for nested query
1827 [ # # # # ]: 0 : named_sources_.emplace(C.pool("SELECT"), sub);
1828 : : /* Reset former computation of projection and move it after the SELECT. */
1829 [ # # ]: 0 : std::swap(graph_->projections_, sub.query_graph().projections_);
1830 : : /* Update `decorrelated_`-flag. */
1831 [ # # # # ]: 0 : if (sub.is_correlated())
1832 : 0 : sub.decorrelated_ = false;
1833 : 0 : }
1834 : :
1835 : : /* Add nested queries in SELECT. */
1836 : : /* TODO iterate over expressions in the order nested queries (equi) < nested queries (non-equi) */
1837 [ - + # # ]: 179 : for (auto [_query, name] : nested_queries_in_select) {
1838 : 0 : auto &query = _query.get();
1839 : :
1840 [ # # ]: 0 : auto &select = as<ast::SelectStmt>(*query.query);
1841 [ # # ]: 0 : auto &select_clause = as<ast::SelectClause>(*select.select);
1842 [ # # # # ]: 0 : M_insist(not select_clause.select_all, "* can not yet be used in nested queries.");
1843 [ # # ]: 0 : M_insist(select_clause.select.size() == 1);
1844 : : /* Replace the alias of the expression by `$res`. */
1845 [ # # # # : 0 : select_clause.select.front().second.text = C.pool("$res");
# # ]
1846 : : /* Create a sub graph for the nested query. */
1847 [ # # ]: 0 : auto sub = QueryGraph::Build(select);
1848 [ # # ]: 0 : auto &q_name = query.alias();
1849 [ # # # # ]: 0 : auto &Q = graph_->add_source(q_name, std::move(sub));
1850 : : /* Create a cartesian product between the nested query and an arbitrary source iff one exists. */
1851 [ # # ]: 0 : if (not named_sources_.empty()) {
1852 : : /* TODO if Q is correlated use the referenced table */
1853 : : DataSource *source;
1854 : : try {
1855 [ # # # # ]: 0 : source = &named_sources_.at(C.pool("SELECT")).get();
1856 [ # # ]: 0 : } catch (std::out_of_range&) { try {
1857 [ # # # # ]: 0 : source = &named_sources_.at(C.pool("HAVING")).get();
1858 [ # # ]: 0 : } catch (std::out_of_range&) {
1859 : 0 : source = &named_sources_.begin()->second.get(); // any source
1860 [ # # # # : 0 : } }
# # ]
1861 [ # # # # : 0 : auto &J = graph_->joins_.emplace_back(new Join(cnf::CNF(), {Q, *source}));
# # # # #
# ]
1862 [ # # ]: 0 : Q.add_join(*J);
1863 [ # # ]: 0 : source->add_join(*J);
1864 : 0 : }
1865 [ # # ]: 0 : named_sources_.emplace(q_name, Q);
1866 [ # # # # ]: 0 : graph_->projections_.emplace_back(query, name);
1867 : 0 : }
1868 : :
1869 : : /* Add order by. */
1870 [ + + ]: 179 : if (stmt.order_by) {
1871 [ + - ]: 4 : auto ORDER_BY = as<ast::OrderByClause>(stmt.order_by.get());
1872 [ + + ]: 9 : for (auto &o : ORDER_BY->order_by)
1873 [ + - ]: 5 : graph_->order_by_.emplace_back(*o.first, o.second);
1874 : 4 : }
1875 : :
1876 : : /* Add limit. */
1877 [ + + ]: 179 : if (stmt.limit) {
1878 [ + - ]: 2 : auto LIMIT = as<ast::LimitClause>(stmt.limit.get());
1879 : 2 : errno = 0;
1880 [ + - ]: 2 : graph_->limit_.limit = strtoull(*(LIMIT->limit.text), nullptr, 0);
1881 [ + - ]: 2 : M_insist(errno == 0);
1882 [ + - + + ]: 2 : if (LIMIT->offset) {
1883 [ + - ]: 1 : graph_->limit_.offset = strtoull(*(LIMIT->offset.text), nullptr, 0);
1884 [ + - ]: 1 : M_insist(errno == 0);
1885 : 1 : }
1886 : 2 : }
1887 : 179 : }
1888 : :
1889 : :
1890 : : /*======================================================================================================================
1891 : : * QueryGraph
1892 : : *====================================================================================================================*/
1893 : :
1894 : 368 : QueryGraph::QueryGraph() { }
1895 : :
1896 : 184 : QueryGraph::~QueryGraph() { }
1897 : :
1898 : 179 : std::unique_ptr<QueryGraph> QueryGraph::Build(const ast::Stmt &stmt)
1899 : : {
1900 : 179 : GraphBuilder builder;
1901 [ + - ]: 179 : builder(stmt);
1902 [ + - ]: 179 : return builder.get();
1903 : 179 : }
1904 : :
1905 : 2 : bool QueryGraph::is_correlated() const {
1906 : : // if (not correlation_info2_->is_correlated()) return true;
1907 : : // if (not correlation_info_->empty()) return true;
1908 : : // for (auto &ds : sources_) {
1909 : : // if (ds->is_correlated()) // recurse into data sources
1910 : : // return true;
1911 : : // }
1912 : 2 : return false;
1913 : : }
1914 : :
1915 : 141 : void QueryGraph::compute_adjacency_matrix() const
1916 : : {
1917 : 141 : adjacency_matrix_ = std::make_unique<AdjacencyMatrix>(num_sources());
1918 : :
1919 : : /* Iterate over all joins in the query graph. */
1920 [ + + ]: 373 : for (auto &join : joins()) {
1921 [ + + ]: 233 : if (join->sources().size() != 2)
1922 [ + - ]: 1 : throw std::invalid_argument("building adjacency matrix for non-binary join");
1923 : : /* Take both join inputs and set the appropriate bit in the adjacency matrix. */
1924 : 232 : auto i = join->sources()[0].get().id(); // first join input
1925 : 232 : auto j = join->sources()[1].get().id(); // second join input
1926 : 232 : adjacency_matrix_->at(i, j) = adjacency_matrix_->at(j, i) = true;
1927 : : }
1928 : 140 : }
1929 : :
1930 : 0 : void QueryGraph::dot(std::ostream &out) const
1931 : : {
1932 : 0 : out << "graph query_graph\n{\n"
1933 : 0 : << " forcelabels=true;\n"
1934 : 0 : << " overlap=false;\n"
1935 : 0 : << " labeljust=\"l\";\n"
1936 : 0 : << " graph [compound=true];\n"
1937 : 0 : << " graph [fontname = \"DejaVu Sans\"];\n"
1938 : 0 : << " node [fontname = \"DejaVu Sans\"];\n"
1939 : 0 : << " edge [fontname = \"DejaVu Sans\"];\n";
1940 : :
1941 : 0 : dot_recursive(out);
1942 : :
1943 : 0 : out << '}' << std::endl;
1944 : 0 : }
1945 : :
1946 : 0 : void QueryGraph::dot_recursive(std::ostream &out) const
1947 : : {
1948 : : #define q(X) '"' << X << '"' // quote
1949 : : #define id(X) q(std::hex << &X << std::dec) // convert virtual address to identifier
1950 [ # # ]: 0 : for (auto &ds : sources()) {
1951 [ # # ]: 0 : if (auto q = cast<Query>(ds.get()))
1952 : 0 : q->query_graph().dot_recursive(out);
1953 : : }
1954 : :
1955 : 0 : out << "\n subgraph cluster_" << this << " {\n";
1956 : :
1957 [ # # ]: 0 : for (auto &ds : sources()) {
1958 : 0 : out << " " << id(*ds) << " [label=<";
1959 [ # # # # ]: 0 : out << "<B>" << ds->name() << "</B>";
1960 [ # # ]: 0 : if (ds->filter().size())
1961 : 0 : out << "<BR/><FONT COLOR=\"0.0 0.0 0.25\" POINT-SIZE=\"10\">"
1962 [ # # # # ]: 0 : << html_escape(to_string(ds->filter()))
1963 [ # # ]: 0 : << "</FONT>";
1964 : 0 : out << ">,style=filled,fillcolor=\"0.0 0.0 0.8\"];\n";
1965 [ # # ]: 0 : if (auto q = cast<Query>(ds.get()))
1966 : 0 : out << " " << id(*ds) << " -- \"cluster_" << &q->query_graph() << "\";\n";
1967 : : }
1968 : :
1969 [ # # ]: 0 : for (auto &j : joins()) {
1970 [ # # # # : 0 : out << " " << id(*j) << " [label=<" << html_escape(to_string(j->condition())) << ">,style=filled,fillcolor=\"0.0 0.0 0.95\"];\n";
# # ]
1971 [ # # ]: 0 : for (auto ds : j->sources())
1972 : 0 : out << " " << id(*j) << " -- " << id(ds.get()) << ";\n";
1973 : : }
1974 : :
1975 : 0 : out << " label=<"
1976 : 0 : << "<TABLE BORDER=\"0\" CELLPADDING=\"0\" CELLSPACING=\"0\">\n";
1977 : :
1978 : : /* Limit */
1979 [ # # # # ]: 0 : if (limit_.limit != 0 or limit_.offset != 0) {
1980 : 0 : out << " <TR><TD ALIGN=\"LEFT\">\n"
1981 : 0 : << " <B>λ</B><FONT POINT-SIZE=\"9\">"
1982 : 0 : << limit_.limit << ", " << limit_.offset
1983 : 0 : << "</FONT>\n"
1984 : 0 : << " </TD></TR>\n";
1985 : 0 : }
1986 : :
1987 : : /* Order by */
1988 [ # # ]: 0 : if (order_by_.size()) {
1989 : 0 : out << " <TR><TD ALIGN=\"LEFT\">\n"
1990 : 0 : << " <B>ω</B><FONT POINT-SIZE=\"9\">";
1991 [ # # ]: 0 : for (auto it = order_by_.begin(), end = order_by_.end(); it != end; ++it) {
1992 [ # # ]: 0 : if (it != order_by_.begin()) out << ", ";
1993 [ # # # # ]: 0 : out << html_escape(to_string(it->first.get()));
1994 : 0 : out << ' ' << (it->second ? "ASC" : "DESC");
1995 : 0 : }
1996 : 0 : out << "</FONT>\n"
1997 : 0 : << " </TD></TR>\n";
1998 : 0 : }
1999 : :
2000 : : /* Projections */
2001 [ # # ]: 0 : if (not projections_.empty()) {
2002 : 0 : out << " <TR><TD ALIGN=\"LEFT\">\n"
2003 : 0 : << " <B>π</B><FONT POINT-SIZE=\"9\">";
2004 [ # # ]: 0 : for (auto it = projections_.begin(), end = projections_.end(); it != end; ++it) {
2005 [ # # ]: 0 : if (it != projections_.begin())
2006 : 0 : out << ", ";
2007 [ # # # # ]: 0 : out << html_escape(to_string(it->first.get()));
2008 [ # # ]: 0 : if (it->second.has_value())
2009 [ # # # # : 0 : out << " AS " << html_escape(*it->second);
# # ]
2010 : 0 : }
2011 : 0 : out << "</FONT>\n"
2012 : 0 : << " </TD></TR>\n";
2013 : 0 : }
2014 : :
2015 : : /* Group by and aggregates */
2016 [ # # # # ]: 0 : if (not group_by_.empty() or not aggregates_.empty()) {
2017 : 0 : out << " <TR><TD ALIGN=\"LEFT\">\n"
2018 : 0 : << " <B>γ</B><FONT POINT-SIZE=\"9\">";
2019 [ # # ]: 0 : for (auto it = group_by_.begin(), end = group_by_.end(); it != end; ++it) {
2020 [ # # ]: 0 : if (it != group_by_.begin()) out << ", ";
2021 [ # # # # ]: 0 : out << html_escape(to_string(it->first.get()));
2022 [ # # ]: 0 : if (it->second.has_value())
2023 [ # # # # : 0 : out << " AS " << html_escape(*it->second);
# # ]
2024 : 0 : }
2025 [ # # # # ]: 0 : if (not group_by_.empty() and not aggregates_.empty())
2026 : 0 : out << ", ";
2027 [ # # ]: 0 : for (auto it = aggregates_.begin(), end = aggregates_.end(); it != end; ++it) {
2028 [ # # ]: 0 : if (it != aggregates_.begin()) out << ", ";
2029 [ # # # # ]: 0 : out << html_escape(to_string(it->get()));
2030 : 0 : }
2031 : 0 : out << "</FONT>\n"
2032 : 0 : << " </TD></TR>\n";
2033 : 0 : }
2034 : :
2035 : 0 : out << " </TABLE>\n"
2036 : 0 : << " >;\n"
2037 : 0 : << " }\n";
2038 : : #undef id
2039 : : #undef q
2040 : 0 : }
2041 : :
2042 : 0 : void QueryGraph::sql(std::ostream &out) const
2043 : : {
2044 : 0 : QueryGraph2SQL t(out);
2045 [ # # ]: 0 : t(*this);
2046 [ # # # # ]: 0 : out << ';' << std::endl;
2047 : 0 : }
2048 : :
2049 : : M_LCOV_EXCL_START
2050 : : void QueryGraph::dump(std::ostream &out) const
2051 : : {
2052 : : out << "QueryGraph {\n sources:";
2053 : :
2054 : : /*----- Print sources. -------------------------------------------------------------------------------------------*/
2055 : : for (auto &src : sources()) {
2056 : : out << "\n ";
2057 : : if (auto q = cast<Query>(src.get())) {
2058 : : out << "(...)";
2059 : : } else {
2060 : : auto bt = as<BaseTable>(*src);
2061 : : out << bt.table().name();
2062 : : }
2063 : : if (src->alias().has_value())
2064 : : out << " AS " << src->alias();
2065 : : if (not src->filter().empty())
2066 : : out << " WHERE " << src->filter();
2067 : : }
2068 : :
2069 : : /*----- Print joins. ---------------------------------------------------------------------------------------------*/
2070 : : if (joins().empty()) {
2071 : : out << "\n no joins";
2072 : : } else {
2073 : : out << "\n joins:";
2074 : : for (auto &j : joins()) {
2075 : : out << "\n {";
2076 : : auto &srcs = j->sources();
2077 : : for (auto it = srcs.begin(), end = srcs.end(); it != end; ++it) {
2078 : : if (it != srcs.begin()) out << ' ';
2079 : : if (it->get().alias().has_value())
2080 : : out << it->get().alias();
2081 : : else if (auto bt = cast<const BaseTable>(&it->get()))
2082 : : out << bt->name();
2083 : : else
2084 : : out << "(...)";
2085 : : }
2086 : : out << "} " << j->condition();
2087 : : }
2088 : : }
2089 : :
2090 : : /*----- Print grouping and aggregation information. -------------------------------------------------------------*/
2091 : : if (group_by().empty() and aggregates().empty()) {
2092 : : out << "\n no grouping";
2093 : : } else {
2094 : : if (not group_by().empty()) {
2095 : : out << "\n group by: ";
2096 : : for (auto it = group_by().begin(), end = group_by().end(); it != end; ++it) {
2097 : : if (it != group_by().begin())
2098 : : out << ", ";
2099 : : out << it->first.get();
2100 : : if (it->second.has_value())
2101 : : out << " AS " << it->second;
2102 : : }
2103 : : }
2104 : : if (not aggregates().empty()) {
2105 : : out << "\n aggregates: ";
2106 : : for (auto it = aggregates().begin(), end = aggregates().end(); it != end; ++it) {
2107 : : if (it != aggregates().begin())
2108 : : out << ", ";
2109 : : out << it->get();
2110 : : }
2111 : : }
2112 : : }
2113 : :
2114 : : /*----- Print ordering information. ------------------------------------------------------------------------------*/
2115 : : if (order_by().empty()) {
2116 : : out << "\n no order";
2117 : : } else {
2118 : : out << "\n order by: ";
2119 : : for (auto it = order_by().begin(), end = order_by().end(); it != end; ++it) {
2120 : : if (it != order_by().begin())
2121 : : out << ", ";
2122 : : out << it->first.get() << ' ' << (it->second ? "ASC" : "DESC");
2123 : : }
2124 : : }
2125 : :
2126 : : /*----- Print projections. ---------------------------------------------------------------------------------------*/
2127 : : out << "\n projections: ";
2128 : : for (auto [expr, alias] : projections()) {
2129 : : if (alias.has_value()) {
2130 : : out << "\n AS " << alias;
2131 : : ast::ASTDumper P(out, 3);
2132 : : P(expr.get());
2133 : : } else {
2134 : : ast::ASTDumper P(out, 2);
2135 : : P(expr.get());
2136 : : }
2137 : : }
2138 : :
2139 : : out << "\n}" << std::endl;
2140 : : }
2141 : : void QueryGraph::dump() const { dump(std::cerr); }
2142 : : M_LCOV_EXCL_STOP
|