Skip to content

Commit 56364bc

Browse files
committed
Small refactoring for better readability based on disscussions with Andrey and Pavel
1 parent a83ae18 commit 56364bc

File tree

6 files changed

+29
-32
lines changed

6 files changed

+29
-32
lines changed

axiom/optimizer/Optimization.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,7 @@ void Optimization::addJoin(
13031303
if (toTry.size() == 2 && candidate.tables.size() == 1) {
13041304
VELOX_DCHECK(!options_.syntacticJoinOrder);
13051305
if (toTry[0].isWorse(toTry[1])) {
1306-
std::swap(toTry[0], toTry[1]);
1306+
toTry[0] = std::move(toTry[1]);
13071307
toTry.pop_back();
13081308
} else if (toTry[1].isWorse(toTry[0])) {
13091309
toTry.pop_back();
@@ -1548,9 +1548,11 @@ PlanP unionPlan(
15481548
for (auto i = 1; i < states.size(); ++i) {
15491549
const auto& otherCost = states[i].cost;
15501550
fullyImported.intersect(inputPlans[i]->fullyImported);
1551-
firstState.cost.add(otherCost);
1552-
// The input cardinality is not additive, the fanout and other metrics are.
1553-
firstState.cost.inputCardinality -= otherCost.inputCardinality;
1551+
// The input cardinality is not additive.
1552+
firstState.cost.setupCost += otherCost.setupCost;
1553+
firstState.cost.unitCost += otherCost.unitCost;
1554+
firstState.cost.totalBytes += otherCost.totalBytes;
1555+
firstState.cost.transferBytes += otherCost.transferBytes;
15541556
}
15551557
if (distinct) {
15561558
firstState.addCost(*distinct);

axiom/optimizer/Plan.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ std::string Plan::toString(bool detail) const {
114114
}
115115

116116
void PlanState::addCost(RelationOp& op) {
117-
cost.unitCost += cost.inputCardinality * cost.fanout * op.cost().unitCost;
117+
VELOX_DCHECK_EQ(cost.inputCardinality, 1);
118118
cost.setupCost += op.cost().setupCost;
119+
cost.unitCost += cost.fanout * op.cost().unitCost;
119120
cost.fanout *= op.cost().fanout;
120121
cost.totalBytes += op.cost().totalBytes;
121122
cost.transferBytes += op.cost().transferBytes;
@@ -339,6 +340,7 @@ PlanP PlanSet::addPlan(RelationOpPtr plan, PlanState& state) {
339340
}
340341

341342
PlanP PlanSet::best(const Distribution& desired, bool& needsShuffle) {
343+
// TODO: Consider desired order here too.
342344
PlanP best = nullptr;
343345
PlanP match = nullptr;
344346
float bestCost = -1;
@@ -347,8 +349,7 @@ PlanP PlanSet::best(const Distribution& desired, bool& needsShuffle) {
347349
const bool single = isSingleWorker();
348350

349351
for (const auto& plan : plans) {
350-
const float cost =
351-
plan->cost.fanout * plan->cost.unitCost + plan->cost.setupCost;
352+
const float cost = plan->cost.cost();
352353

353354
auto update = [&](PlanP& current, float& currentCost) {
354355
if (!current || cost < currentCost) {
@@ -370,8 +371,9 @@ PlanP PlanSet::best(const Distribution& desired, bool& needsShuffle) {
370371
}
371372

372373
if (match) {
373-
const float shuffle = shuffleCost(best->op->columns()) * best->cost.fanout;
374-
if (matchCost <= bestCost + shuffle) {
374+
const float bestCostWithShuffle =
375+
best->cost.cost(shuffleCost(best->op->columns()));
376+
if (matchCost <= bestCostWithShuffle) {
375377
return match;
376378
}
377379
}

axiom/optimizer/RelationOp.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,6 @@
2424
#include "velox/expression/ScopedVarSetter.h"
2525

2626
namespace facebook::axiom::optimizer {
27-
28-
void Cost::add(const Cost& other) {
29-
inputCardinality += other.inputCardinality;
30-
fanout += other.fanout;
31-
setupCost += other.setupCost;
32-
}
33-
3427
namespace {
3528

3629
const auto& relTypeNames() {
@@ -216,8 +209,7 @@ void RelationOp::printCost(bool detail, std::stringstream& out) const {
216209
auto ctx = queryCtx();
217210
if (ctx && ctx->contextPlan()) {
218211
auto plan = ctx->contextPlan();
219-
auto totalCost = plan->cost.unitCost + plan->cost.setupCost;
220-
auto pct = 100 * cost_.inputCardinality * cost_.unitCost / totalCost;
212+
auto pct = 100 * cost_.cost() / plan->cost.cost();
221213
out << " " << std::fixed << std::setprecision(2) << pct << "% ";
222214
}
223215
if (detail) {
@@ -353,7 +345,7 @@ Join::Join(
353345
cost_.inputCardinality = inputCardinality();
354346
cost_.fanout = fanout;
355347

356-
const float buildSize = right->cost().inputCardinality;
348+
const float buildSize = right->resultCardinality();
357349
const auto numRightColumns =
358350
static_cast<float>(right->input()->columns().size());
359351
auto rowCost = numRightColumns * Costs::kHashExtractColumnCost;
@@ -535,7 +527,7 @@ Aggregation::Aggregation(
535527
// being unique after n values is 1 - (1/d)^n.
536528
auto nOut = cardinality -
537529
cardinality *
538-
std::pow(1.0F - (1.0F / cardinality), input_->resultCardinality());
530+
std::pow(1.0F - (1.0F / cardinality), cost_.inputCardinality);
539531

540532
cost_.fanout = nOut / cost_.inputCardinality;
541533
const auto numGrouppingKeys = static_cast<float>(groupingKeys.size());
@@ -802,8 +794,7 @@ UnionAll::UnionAll(RelationOpPtrVector inputsVector)
802794
: RelationOp{RelType::kUnionAll, nullptr, Distribution{}, inputsVector[0]->columns()},
803795
inputs{std::move(inputsVector)} {
804796
for (auto& input : inputs) {
805-
cost_.inputCardinality +=
806-
input->cost().inputCardinality * input->cost().fanout;
797+
cost_.inputCardinality += input->resultCardinality();
807798
}
808799

809800
cost_.fanout = 1;

axiom/optimizer/RelationOp.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,21 @@ struct Cost {
5454
// or index access.
5555
float setupCost{0};
5656

57-
/// Total cost of the operation.
57+
/// Returns the estimated number of result rows.
58+
float resultCardinality() const {
59+
return inputCardinality * fanout;
60+
}
61+
62+
/// Total cost of the a RelationOp or Plan.
5863
float cost() const {
5964
return setupCost + inputCardinality * unitCost;
6065
}
6166

62-
/// Total cost including cost of shuffling the output rows.
67+
/// Total cost a RelationOp or Plan plus cost of shuffling the result rows.
6368
/// TODO: I'm not sure our shuffle cost model is right.
6469
/// Because broadcast vs gather vs partitioned cost are same now.
6570
float cost(float shuffleCostPerRow) const {
66-
const auto outputCardinality = inputCardinality * fanout;
67-
return cost() + outputCardinality * shuffleCostPerRow;
71+
return cost() + resultCardinality() * shuffleCostPerRow;
6872
}
6973

7074
// Estimate of total data volume for a hash join build or group/order
@@ -79,8 +83,6 @@ struct Cost {
7983
// amount of spill is 'totalBytes' - 'peakResidentBytes'.
8084
float peakResidentBytes{0};
8185

82-
void add(const Cost& other);
83-
8486
/// If 'isUnit' shows the cost/cardinality for one row, else for
8587
/// 'inputCardinality' rows.
8688
std::string toString(bool detail, bool isUnit = false) const;
@@ -204,7 +206,7 @@ class RelationOp {
204206

205207
/// Returns the number of output rows.
206208
float resultCardinality() const {
207-
return cost_.inputCardinality * cost_.fanout;
209+
return cost_.resultCardinality();
208210
}
209211

210212
float inputCardinality() const {

axiom/optimizer/ToVelox.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,8 +1488,7 @@ void ToVelox::makePredictionAndHistory(
14881488
const velox::core::PlanNodeId& id,
14891489
const RelationOp* op) {
14901490
nodeHistory_[id] = op->historyKey();
1491-
prediction_[id] = NodePrediction{
1492-
.cardinality = op->cost().inputCardinality * op->cost().fanout};
1491+
prediction_[id] = NodePrediction{.cardinality = op->resultCardinality()};
14931492
}
14941493

14951494
velox::core::PlanNodePtr ToVelox::makeFragment(

axiom/optimizer/tests/TpchPlanTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ class TpchPlanTest : public virtual test::HiveQueriesTestBase {
6464
}
6565

6666
static std::string readSqlFromFile(const std::string& filePath) {
67-
auto path = velox::test::getDataFilePath("axiom/optimizer/tests", filePath);
67+
auto path =
68+
velox::test::getDataFilePath("", "axiom/optimizer/tests/" + filePath);
6869
std::ifstream inputFile(path, std::ifstream::binary);
6970

7071
VELOX_CHECK(inputFile, "Failed to open SQL file: {}", path);

0 commit comments

Comments
 (0)