Skip to content

Commit 2b6dc1f

Browse files
committed
Small refactoring for better readability based on disscussions with Andrey and Pavel
1 parent 0e51f6d commit 2b6dc1f

File tree

6 files changed

+33
-38
lines changed

6 files changed

+33
-38
lines changed

axiom/optimizer/Optimization.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -477,9 +477,9 @@ bool isIndexColocated(
477477
const IndexInfo& info,
478478
const ExprVector& lookupValues,
479479
const RelationOpPtr& input) {
480-
const auto& distribution = info.index->distribution;
480+
const auto& desired = info.index->distribution;
481481
if (const auto needsShuffle =
482-
input->distribution().maybeNeedsShuffle(distribution)) {
482+
input->distribution().maybeNeedsShuffle(desired)) {
483483
return !*needsShuffle;
484484
}
485485
// TODO: Future partition type should be accounted.
@@ -490,8 +490,7 @@ bool isIndexColocated(
490490
for (size_t i = 0; i < input->distribution().partition.size(); ++i) {
491491
auto nthKey = position(lookupValues, *input->distribution().partition[i]);
492492
if (nthKey == kNotFound ||
493-
info.schemaColumn(info.lookupKeys[nthKey]) !=
494-
distribution.partition[i]) {
493+
info.schemaColumn(info.lookupKeys[nthKey]) != desired.partition[i]) {
495494
return false;
496495
}
497496
}
@@ -600,7 +599,7 @@ PlanObjectSet availableColumns(BaseTableCP baseTable, ColumnGroupCP index) {
600599
}
601600

602601
bool isBroadcastableSize(PlanP build, PlanState& /*state*/) {
603-
return build->cost.fanout < 100'000;
602+
return build->cost.resultCardinality() < 100'000;
604603
}
605604

606605
// The 'other' side gets shuffled to align with 'input'. If 'input' is not
@@ -1303,7 +1302,7 @@ void Optimization::addJoin(
13031302
if (toTry.size() == 2 && candidate.tables.size() == 1) {
13041303
VELOX_DCHECK(!options_.syntacticJoinOrder);
13051304
if (toTry[0].isWorse(toTry[1])) {
1306-
std::swap(toTry[0], toTry[1]);
1305+
toTry[0] = std::move(toTry[1]);
13071306
toTry.pop_back();
13081307
} else if (toTry[1].isWorse(toTry[0])) {
13091308
toTry.pop_back();
@@ -1548,9 +1547,11 @@ PlanP unionPlan(
15481547
for (auto i = 1; i < states.size(); ++i) {
15491548
const auto& otherCost = states[i].cost;
15501549
fullyImported.intersect(inputPlans[i]->fullyImported);
1551-
firstState.cost.add(otherCost);
1552-
// The input cardinality is not additive, the fanout and other metrics are.
1553-
firstState.cost.inputCardinality -= otherCost.inputCardinality;
1550+
// The input cardinality is not additive.
1551+
firstState.cost.setupCost += otherCost.setupCost;
1552+
firstState.cost.unitCost += otherCost.unitCost;
1553+
firstState.cost.totalBytes += otherCost.totalBytes;
1554+
firstState.cost.transferBytes += otherCost.transferBytes;
15541555
}
15551556
if (distinct) {
15561557
firstState.addCost(*distinct);

axiom/optimizer/Plan.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ std::string Plan::toString(bool detail) const {
114114
}
115115

116116
void PlanState::addCost(RelationOp& op) {
117-
cost.unitCost += cost.inputCardinality * cost.fanout * op.cost().unitCost;
117+
VELOX_DCHECK_EQ(cost.inputCardinality, 1);
118118
cost.setupCost += op.cost().setupCost;
119+
cost.unitCost += op.cost().unitCost * cost.fanout;
119120
cost.fanout *= op.cost().fanout;
120121
cost.totalBytes += op.cost().totalBytes;
121122
cost.transferBytes += op.cost().transferBytes;
@@ -339,6 +340,7 @@ PlanP PlanSet::addPlan(RelationOpPtr plan, PlanState& state) {
339340
}
340341

341342
PlanP PlanSet::best(const Distribution& desired, bool& needsShuffle) {
343+
// TODO: Consider desired order here too.
342344
PlanP best = nullptr;
343345
PlanP match = nullptr;
344346
float bestCost = -1;
@@ -347,8 +349,7 @@ PlanP PlanSet::best(const Distribution& desired, bool& needsShuffle) {
347349
const bool single = isSingleWorker();
348350

349351
for (const auto& plan : plans) {
350-
const float cost =
351-
plan->cost.fanout * plan->cost.unitCost + plan->cost.setupCost;
352+
const float cost = plan->cost.cost();
352353

353354
auto update = [&](PlanP& current, float& currentCost) {
354355
if (!current || cost < currentCost) {
@@ -370,8 +371,9 @@ PlanP PlanSet::best(const Distribution& desired, bool& needsShuffle) {
370371
}
371372

372373
if (match) {
373-
const float shuffle = shuffleCost(best->op->columns()) * best->cost.fanout;
374-
if (matchCost <= bestCost + shuffle) {
374+
const float bestCostWithShuffle =
375+
best->cost.cost(shuffleCost(best->op->columns()));
376+
if (matchCost <= bestCostWithShuffle) {
375377
return match;
376378
}
377379
}

axiom/optimizer/RelationOp.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,6 @@
2424
#include "velox/expression/ScopedVarSetter.h"
2525

2626
namespace facebook::axiom::optimizer {
27-
28-
void Cost::add(const Cost& other) {
29-
inputCardinality += other.inputCardinality;
30-
fanout += other.fanout;
31-
setupCost += other.setupCost;
32-
}
33-
3427
namespace {
3528

3629
const auto& relTypeNames() {
@@ -216,8 +209,7 @@ void RelationOp::printCost(bool detail, std::stringstream& out) const {
216209
auto ctx = queryCtx();
217210
if (ctx && ctx->contextPlan()) {
218211
auto plan = ctx->contextPlan();
219-
auto totalCost = plan->cost.unitCost + plan->cost.setupCost;
220-
auto pct = 100 * cost_.inputCardinality * cost_.unitCost / totalCost;
212+
auto pct = 100 * cost_.cost() / plan->cost.cost();
221213
out << " " << std::fixed << std::setprecision(2) << pct << "% ";
222214
}
223215
if (detail) {
@@ -353,7 +345,7 @@ Join::Join(
353345
cost_.inputCardinality = inputCardinality();
354346
cost_.fanout = fanout;
355347

356-
const float buildSize = right->cost().inputCardinality;
348+
const float buildSize = right->resultCardinality();
357349
const auto numRightColumns =
358350
static_cast<float>(right->input()->columns().size());
359351
auto rowCost = numRightColumns * Costs::kHashExtractColumnCost;
@@ -535,7 +527,7 @@ Aggregation::Aggregation(
535527
// being unique after n values is 1 - (1/d)^n.
536528
auto nOut = cardinality -
537529
cardinality *
538-
std::pow(1.0F - (1.0F / cardinality), input_->resultCardinality());
530+
std::pow(1.0F - (1.0F / cardinality), cost_.inputCardinality);
539531

540532
cost_.fanout = nOut / cost_.inputCardinality;
541533
const auto numGrouppingKeys = static_cast<float>(groupingKeys.size());
@@ -802,8 +794,7 @@ UnionAll::UnionAll(RelationOpPtrVector inputsVector)
802794
: RelationOp{RelType::kUnionAll, nullptr, Distribution{}, inputsVector[0]->columns()},
803795
inputs{std::move(inputsVector)} {
804796
for (auto& input : inputs) {
805-
cost_.inputCardinality +=
806-
input->cost().inputCardinality * input->cost().fanout;
797+
cost_.inputCardinality += input->resultCardinality();
807798
}
808799

809800
cost_.fanout = 1;

axiom/optimizer/RelationOp.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,21 @@ struct Cost {
5454
// or index access.
5555
float setupCost{0};
5656

57-
/// Total cost of the operation.
57+
/// Total cost of the a RelationOp or Plan.
5858
float cost() const {
59-
return setupCost + inputCardinality * unitCost;
59+
return setupCost + unitCost * inputCardinality;
6060
}
6161

62-
/// Total cost including cost of shuffling the output rows.
62+
/// Returns the estimated number of result rows.
63+
float resultCardinality() const {
64+
return fanout * inputCardinality;
65+
}
66+
67+
/// Total cost a RelationOp or Plan plus cost of shuffling the result rows.
6368
/// TODO: I'm not sure our shuffle cost model is right.
6469
/// Because broadcast vs gather vs partitioned cost are same now.
6570
float cost(float shuffleCostPerRow) const {
66-
const auto outputCardinality = inputCardinality * fanout;
67-
return cost() + outputCardinality * shuffleCostPerRow;
71+
return cost() + shuffleCostPerRow * resultCardinality();
6872
}
6973

7074
// Estimate of total data volume for a hash join build or group/order
@@ -79,8 +83,6 @@ struct Cost {
7983
// amount of spill is 'totalBytes' - 'peakResidentBytes'.
8084
float peakResidentBytes{0};
8185

82-
void add(const Cost& other);
83-
8486
/// If 'isUnit' shows the cost/cardinality for one row, else for
8587
/// 'inputCardinality' rows.
8688
std::string toString(bool detail, bool isUnit = false) const;
@@ -204,7 +206,7 @@ class RelationOp {
204206

205207
/// Returns the number of output rows.
206208
float resultCardinality() const {
207-
return cost_.inputCardinality * cost_.fanout;
209+
return cost_.resultCardinality();
208210
}
209211

210212
float inputCardinality() const {

axiom/optimizer/Schema.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ std::optional<bool> Distribution::maybeNeedsShuffle(
371371
}
372372

373373
bool Distribution::needsShuffle(const Distribution& desired) const {
374-
if (auto needsShuffle = maybeNeedsShuffle(desired)) {
374+
if (const auto needsShuffle = maybeNeedsShuffle(desired)) {
375375
return *needsShuffle;
376376
}
377377
// TODO: Future partition type should decide this.

axiom/optimizer/ToVelox.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,8 +1488,7 @@ void ToVelox::makePredictionAndHistory(
14881488
const velox::core::PlanNodeId& id,
14891489
const RelationOp* op) {
14901490
nodeHistory_[id] = op->historyKey();
1491-
prediction_[id] = NodePrediction{
1492-
.cardinality = op->cost().inputCardinality * op->cost().fanout};
1491+
prediction_[id] = NodePrediction{.cardinality = op->resultCardinality()};
14931492
}
14941493

14951494
velox::core::PlanNodePtr ToVelox::makeFragment(

0 commit comments

Comments
 (0)