Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion axiom/optimizer/DerivedTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ void DerivedTable::linkTablesToJoins() {
// from all the tables it depends on.
for (auto join : joins) {
PlanObjectSet tables;
if (join->isInner() && join->directed()) {
if (join->directed()) {
tables.add(join->leftTable());
} else {
for (auto key : join->leftKeys()) {
Expand Down Expand Up @@ -900,6 +900,19 @@ void DerivedTable::distributeConjuncts() {
tables[0]->as<DerivedTable>()->setOp.value() ==
logical_plan::SetOperation::kUnionAll));

PlanObjectSet noPushdownTables;
for (const auto* join : joins) {
if (join->leftOptional()) {
// No pushdown to the left side of a RIGHT or FULL join.
noPushdownTables.add(join->leftTable());
}
if (join->rightOptional()) {
// No pushdown to the right side of a LEFT or FULL join.
noPushdownTables.add(join->rightTable());
}
}
VELOX_DCHECK(tables.size() > 1 || noPushdownTables.empty());

for (auto i = 0; i < conjuncts.size(); ++i) {
// No pushdown of non-deterministic except if only pushdown target is a
// union all.
Expand All @@ -924,6 +937,10 @@ void DerivedTable::distributeConjuncts() {
continue; // UnnestTable does not have filter push-down.
}

if (noPushdownTables.contains(tables[0])) {
continue; // No pushdown if depends on an optional side of a join.
}

if (tables[0]->is(PlanType::kDerivedTableNode)) {
// Translate the column names and add the condition to the conjuncts in
// the dt. If the inner is a set operation, add the filter to children.
Expand Down
23 changes: 21 additions & 2 deletions axiom/optimizer/Optimization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ void reducingJoinsRecursive(
resultFunc = {}) {
bool isLeaf = true;
for (auto join : joinedBy(candidate)) {
if (join->leftOptional() || join->rightOptional()) {
if (!join->isInner()) {
continue;
}
JoinSide other = join->sideOf(candidate, true);
Expand Down Expand Up @@ -1377,7 +1377,6 @@ void Optimization::addJoin(
joinByHash(plan, candidate, state, toTry);

if (!options_.syntacticJoinOrder && toTry.size() > sizeAfterIndex &&
candidate.join->isNonCommutative() &&
candidate.join->hasRightHashVariant()) {
// There is a hash based candidate with a non-commutative join. Try a right
// join variant.
Expand Down Expand Up @@ -1498,6 +1497,19 @@ bool Optimization::placeConjuncts(
state.dt->singleRowDts.forEach<DerivedTable>(
[&](auto dt) { columnsAndSingles.unionObjects(dt->columns); });

PlanObjectSet noPushdownTables;
if (!allowNondeterministic) {
for (const auto* join : state.dt->joins) {
if (join->leftOptional()) {
// No pushdown to the left side of a RIGHT or FULL join.
noPushdownTables.add(join->leftTable());
}
if (join->rightOptional()) {
// No pushdown to the right side of a LEFT or FULL join.
noPushdownTables.add(join->rightTable());
}
}
}
ExprVector filters;
for (auto& conjunct : state.dt->conjuncts) {
if (!allowNondeterministic && conjunct->containsNonDeterministic()) {
Expand All @@ -1506,6 +1518,13 @@ bool Optimization::placeConjuncts(
if (state.placed.contains(conjunct)) {
continue;
}
if (!allowNondeterministic) {
const auto allTables = conjunct->allTables();
if (allTables.size() == 1 &&
noPushdownTables.contains(allTables.toObjects()[0])) {
continue;
}
}
if (conjunct->columns().isSubset(state.columns)) {
state.columns.add(conjunct);
filters.push_back(conjunct);
Expand Down
3 changes: 3 additions & 0 deletions axiom/optimizer/PlanObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,18 +150,21 @@ class PlanObjectSet : public BitSet {
public:
/// True if id of 'object' is in 'this'.
bool contains(PlanObjectCP object) const {
VELOX_DCHECK_NOT_NULL(object);
return object->id() < bits_.size() * 64 &&
velox::bits::isBitSet(bits_.data(), object->id());
}

/// Inserts id of 'object'.
void add(PlanObjectCP object) {
VELOX_DCHECK_NOT_NULL(object);
auto id = object->id();
BitSet::add(id);
}

/// Erases id of 'object'.
void erase(PlanObjectCP object) {
VELOX_DCHECK_NOT_NULL(object);
BitSet::erase(object->id());
}

Expand Down
13 changes: 7 additions & 6 deletions axiom/optimizer/QueryGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ std::string JoinEdge::toString() const {
out << "<join "
<< (leftTable_ ? leftTable_->toString() : " multiple tables ");
if (leftOptional_ && rightOptional_) {
out << " full outr ";
out << " full outer ";
} else if (markColumn_) {
out << " exists project ";
} else if (rightOptional_) {
Expand Down Expand Up @@ -472,18 +472,19 @@ PlanObjectSet JoinEdge::allTables() const {
}

namespace {
template <typename U>

inline CPSpan<Column> toRangeCast(const ExprVector& exprs) {
return CPSpan<Column>(
reinterpret_cast<const Column* const*>(exprs.data()), exprs.size());
return {reinterpret_cast<const Column* const*>(exprs.data()), exprs.size()};
}

} // namespace

void JoinEdge::guessFanout() {
if (fanoutsFixed_) {
return;
}

// TODO: Why fanouts are set to 1.1 and 1 when left table is null?
if (leftTable_ == nullptr) {
lrFanout_ = 1.1;
rlFanout_ = 1;
Expand All @@ -492,8 +493,8 @@ void JoinEdge::guessFanout() {

auto* opt = queryCtx()->optimization();
auto samplePair = opt->history().sampleJoin(this);
auto left = joinCardinality(leftTable_, toRangeCast<Column>(leftKeys_));
auto right = joinCardinality(rightTable_, toRangeCast<Column>(rightKeys_));
auto left = joinCardinality(leftTable_, toRangeCast(leftKeys_));
auto right = joinCardinality(rightTable_, toRangeCast(rightKeys_));
leftUnique_ = left.unique;
rightUnique_ = right.unique;
if (samplePair.first == 0 && samplePair.second == 0) {
Expand Down
26 changes: 14 additions & 12 deletions axiom/optimizer/QueryGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,9 @@ struct JoinSide {
if (isExists) {
return velox::core::JoinType::kLeftSemiFilter;
}
if (isOptional && isNonOptionalOfOuter) {
return velox::core::JoinType::kFull;
}
if (isOptional) {
return velox::core::JoinType::kLeft;
}
Expand Down Expand Up @@ -485,7 +488,11 @@ class JoinEdge {
rightNotExists_(spec.rightNotExists),
directed_(spec.directed),
markColumn_(spec.markColumn) {
VELOX_CHECK_NOT_NULL(rightTable);
if (leftOptional_ || !rightOptional_) {
// Only left join can have null left table.
VELOX_DCHECK_NOT_NULL(leftTable_);
}
VELOX_DCHECK_NOT_NULL(rightTable_);
// filter_ is only for non-inner joins.
VELOX_CHECK(filter_.empty() || !isInner());
}
Expand All @@ -508,7 +515,6 @@ class JoinEdge {
PlanObjectCP leftTable,
PlanObjectCP rightTable,
ExprVector unnestExprs) {
VELOX_DCHECK_NOT_NULL(leftTable);
auto* edge = make<JoinEdge>(leftTable, rightTable, Spec{.directed = true});
edge->leftKeys_ = std::move(unnestExprs);
// TODO Not sure to what values fanout need to be set,
Expand Down Expand Up @@ -577,7 +583,7 @@ class JoinEdge {
/// True if inner join.
bool isInner() const {
return !leftOptional_ && !rightOptional_ && !rightExists_ &&
!rightNotExists_ && !markColumn_;
!rightNotExists_ && !directed_ && !markColumn_;
}

bool isSemi() const {
Expand All @@ -592,18 +598,13 @@ class JoinEdge {
/// placing this.
bool isNonCommutative() const {
// Inner and full outer joins are commutative.
if (rightOptional_ && leftOptional_) {
return false;
}

return !leftTable_ || rightOptional_ || leftOptional_ || rightExists_ ||
rightNotExists_ || markColumn_ || directed_;
return !(isInner() || (leftOptional_ && rightOptional_));
}

/// True if has a hash based variant that builds on the left and probes on the
/// right.
bool hasRightHashVariant() const {
return isNonCommutative() && !rightNotExists_;
return isNonCommutative() && !isAnti() && !directed();
}

/// Returns the join side info for 'table'. If 'other' is set, returns the
Expand All @@ -614,9 +615,10 @@ class JoinEdge {
/// the returned table for one row in 'table'. If the join is not inner
/// returns {nullptr, 0}.
std::pair<PlanObjectCP, float> otherTable(PlanObjectCP table) const {
return leftTable_ == table && !leftOptional_
VELOX_DCHECK_NOT_NULL(table);
return leftTable_ == table
? std::pair<PlanObjectCP, float>{rightTable_, lrFanout_}
: rightTable_ == table && !rightOptional_ && !rightExists_
: rightTable_ == table && leftTable_
? std::pair<PlanObjectCP, float>{leftTable_, rlFanout_}
: std::pair<PlanObjectCP, float>{nullptr, 0};
}
Expand Down
Loading