@@ -102,12 +102,6 @@ Plan::Plan(RelationOpPtr op, const PlanState& state)
102102 columns(exprColumns(state.targetExprs)),
103103 fullyImported(state.dt->fullyImported) {}
104104
105- bool Plan::isStateBetter (const PlanState& state, float perRowMargin) const {
106- return cost.unitCost * cost.inputCardinality + cost.setupCost >
107- state.cost .unitCost * state.cost .inputCardinality + state.cost .setupCost +
108- perRowMargin * state.cost .fanout ;
109- }
110-
111105std::string Plan::printCost () const {
112106 return cost.toString (true , false );
113107}
@@ -120,9 +114,10 @@ std::string Plan::toString(bool detail) const {
120114}
121115
122116void PlanState::addCost (RelationOp& op) {
123- cost.unitCost += cost. inputCardinality * cost. fanout * op. cost (). unitCost ;
117+ VELOX_DCHECK_EQ ( cost.inputCardinality , 1 ) ;
124118 cost.setupCost += op.cost ().setupCost ;
125- cost.fanout *= op.cost ().fanout ;
119+ cost.unitCost += op.cost ().unitCost * op.cost ().inputCardinality ;
120+ cost.fanout = op.cost ().resultCardinality ();
126121 cost.totalBytes += op.cost ().totalBytes ;
127122 cost.transferBytes += op.cost ().transferBytes ;
128123}
@@ -276,74 +271,76 @@ std::string PlanState::printPlan(RelationOpPtr op, bool detail) const {
276271}
277272
278273PlanP PlanSet::addPlan (RelationOpPtr plan, PlanState& state) {
279- int32_t replaceIndex = -1 ;
280- const float shuffleCostPerRow =
281- shuffleCost (plan->columns ()) * state.cost .fanout ;
282-
283- if (!plans.empty ()) {
284- // Compare with existing. If there is one with same distribution and new is
285- // better, replace. If there is one with a different distribution and the
286- // new one can produce the same distribution by repartition, for cheaper,
287- // add the new one and delete the old one.
288- for (auto i = 0 ; i < plans.size (); ++i) {
289- auto old = plans[i].get ();
290- if (state.input != old->input ) {
291- continue ;
292- }
274+ const float shuffleCostPerRow = shuffleCost (plan->columns ());
293275
294- const bool newIsBetter = old->isStateBetter (state);
295- const bool newIsBetterWithShuffle =
296- old->isStateBetter (state, shuffleCostPerRow);
297- const bool sameDist =
298- old->op ->distribution ().isSamePartition (plan->distribution ());
299- const bool sameOrder =
300- old->op ->distribution ().isSameOrder (plan->distribution ());
301- if (sameDist && sameOrder) {
302- if (newIsBetter) {
303- replaceIndex = i;
304- continue ;
305- }
306- // There's a better one with same dist and partition.
307- return nullptr ;
308- }
276+ // Determine is old plan worse the new one in all aspects.
277+ auto isWorse = [&](const Plan& old) {
278+ if (plan->distribution ().needsSort (old.op ->distribution ())) {
279+ // New plan needs a sort to match the old one, so cannot compare.
280+ return false ;
281+ }
282+ const bool needsShuffle =
283+ plan->distribution ().needsShuffle (old.op ->distribution ());
284+ return old.cost .cost () >
285+ state.cost .cost (needsShuffle ? shuffleCostPerRow : 0 );
286+ };
309287
310- if (newIsBetterWithShuffle && old->op ->distribution ().orderKeys .empty ()) {
311- // Old plan has no order and is worse than new plus shuffle. Can't win.
312- // Erase.
313- queryCtx ()->optimization ()->trace (
314- OptimizerOptions::kExceededBest ,
315- state.dt ->id (),
316- old->cost ,
317- *old->op );
318- plans.erase (plans.begin () + i);
319- --i;
320- continue ;
321- }
288+ // Determine is old plan better than the new one in all aspects.
289+ auto isBetter = [&](const Plan& old) {
290+ if (old.op ->distribution ().needsSort (plan->distribution ())) {
291+ // Old plan needs a sort to match the new one, so cannot compare.
292+ return false ;
293+ }
294+ const bool needsShuffle =
295+ old.op ->distribution ().needsShuffle (plan->distribution ());
296+ return state.cost .cost () >
297+ old.cost .cost (needsShuffle ? shuffleCost (old.op ->columns ()) : 0 );
298+ };
322299
323- if (plan->distribution ().orderKeys .empty () &&
324- !old->isStateBetter (state, -shuffleCostPerRow)) {
325- // New has no order and old would beat it even after adding shuffle.
326- return nullptr ;
327- }
300+ // Compare with existing plans.
301+ const auto plansSize = plans.size ();
302+ enum {
303+ kFoundWorse = -1 ,
304+ kNone = 0 ,
305+ kFoundBetter = 1 ,
306+ };
307+ auto found = kNone ;
308+ for (size_t i = 0 ; i < plans.size (); ++i) {
309+ const auto & old = *plans[i];
310+ if (old.input != state.input ) {
311+ // Different plans, cannot compare.
312+ continue ;
328313 }
314+ if (isWorse (old)) {
315+ // Remove old plan, it is worse than the new one in all aspects.
316+ queryCtx ()->optimization ()->trace (
317+ OptimizerOptions::kExceededBest , state.dt ->id (), old.cost , *old.op );
318+ std::swap (plans[i], plans.back ());
319+ plans.pop_back ();
320+ --i;
321+ found = kFoundWorse ;
322+ } else if (found == kNone && isBetter (old)) {
323+ // Old plan is better than the new one in all aspects.
324+ found = kFoundBetter ;
325+ }
326+ }
327+ if (found == kFoundBetter ) {
328+ // No existing plan was worse than the new one in all aspects,
329+ // and at least one existing plan is better than the new one in all aspects.
330+ // So don't add the new plan.
331+ return nullptr ;
329332 }
330333
331334 auto newPlan = std::make_unique<Plan>(std::move (plan), state);
332335 auto * result = newPlan.get ();
333- auto newPlanCost =
334- result->cost .unitCost + result->cost .setupCost + shuffleCostPerRow;
335- if (bestCostWithShuffle == 0 || newPlanCost < bestCostWithShuffle) {
336- bestCostWithShuffle = newPlanCost;
337- }
338- if (replaceIndex >= 0 ) {
339- plans[replaceIndex] = std::move (newPlan);
340- } else {
341- plans.push_back (std::move (newPlan));
342- }
336+ bestCostWithShuffle =
337+ std::min (bestCostWithShuffle, result->cost .cost (shuffleCostPerRow));
338+ plans.push_back (std::move (newPlan));
343339 return result;
344340}
345341
346- PlanP PlanSet::best (const Distribution& distribution, bool & needsShuffle) {
342+ PlanP PlanSet::best (const Distribution& desired, bool & needsShuffle) {
343+ // TODO: Consider desired order here too.
347344 PlanP best = nullptr ;
348345 PlanP match = nullptr ;
349346 float bestCost = -1 ;
@@ -352,8 +349,7 @@ PlanP PlanSet::best(const Distribution& distribution, bool& needsShuffle) {
352349 const bool single = isSingleWorker ();
353350
354351 for (const auto & plan : plans) {
355- const float cost =
356- plan->cost .fanout * plan->cost .unitCost + plan->cost .setupCost ;
352+ const float cost = plan->cost .cost ();
357353
358354 auto update = [&](PlanP& current, float & currentCost) {
359355 if (!current || cost < currentCost) {
@@ -363,7 +359,7 @@ PlanP PlanSet::best(const Distribution& distribution, bool& needsShuffle) {
363359 };
364360
365361 update (best, bestCost);
366- if (!single && plan->op ->distribution ().isSamePartition (distribution )) {
362+ if (!single && ! plan->op ->distribution ().needsShuffle (desired )) {
367363 update (match, matchCost);
368364 }
369365 }
@@ -375,8 +371,9 @@ PlanP PlanSet::best(const Distribution& distribution, bool& needsShuffle) {
375371 }
376372
377373 if (match) {
378- const float shuffle = shuffleCost (best->op ->columns ()) * best->cost .fanout ;
379- if (matchCost <= bestCost + shuffle) {
374+ const float bestCostWithShuffle =
375+ best->cost .cost (shuffleCost (best->op ->columns ()));
376+ if (matchCost <= bestCostWithShuffle) {
380377 return match;
381378 }
382379 }
@@ -489,12 +486,14 @@ std::string JoinCandidate::toString() const {
489486}
490487
491488bool NextJoin::isWorse (const NextJoin& other) const {
492- float shuffle =
493- plan->distribution ().isSamePartition (other.plan ->distribution ())
494- ? 0
495- : plan->cost ().fanout * shuffleCost (plan->columns ());
496- return cost.unitCost + cost.setupCost + shuffle >
497- other.cost .unitCost + other.cost .setupCost ;
489+ if (other.plan ->distribution ().needsSort (plan->distribution ())) {
490+ // 'other' needs a sort to match 'plan', so cannot compare.
491+ return false ;
492+ }
493+ const auto needsShuffle =
494+ other.plan ->distribution ().needsShuffle (plan->distribution ());
495+ return cost.cost () >
496+ other.cost .cost (needsShuffle ? shuffleCost (other.plan ->columns ()) : 0 );
498497}
499498
500499size_t MemoKey::hash () const {
0 commit comments