Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions axiom/optimizer/Cost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ void History::updateFromFile(const std::string& path) {
}

float shuffleCost(const ColumnVector& columns) {
return byteSize(columns);
return byteSize(columns) * Costs::kByteShuffleCost;
}

float shuffleCost(const ExprVector& exprs) {
return byteSize(exprs);
return byteSize(exprs) * Costs::kByteShuffleCost;
}

float selfCost(ExprCP expr) {
Expand Down
29 changes: 25 additions & 4 deletions axiom/optimizer/Cost.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ struct NodePrediction {
float cardinality;
/// Peak total memory for the top node.
float peakMemory{0};
/// CPU estimate in optimizer internal units.
float cpu{0};
};

/// Interface to historical query cost and cardinality
Expand Down Expand Up @@ -103,23 +105,42 @@ struct Costs {
: kLargeHashCost;
}

static float hashBuildCost(float cardinality) {
// To build, a row is written once and read at least once. A write is ~2
// redreads.
return 3 * hashProbeCost(cardinality);
}

static constexpr float kKeyCompareCost =
6; // ~30 instructions to find, decode and an compare
static constexpr float kArrayProbeCost = 2; // ~10 instructions.
static constexpr float kSmallHashCost = 10; // 50 instructions
static constexpr float kLargeHashCost = 40; // 2 LLC misses
static constexpr float kSmallHashCost = 4; // 50 instructions
static constexpr float kLargeHashCost = 12; // 2 LLC misses
static constexpr float kColumnRowCost = 5;
static constexpr float kColumnByteCost = 0.1;

/// Cost of hash function on one column.
static constexpr float kHashColumnCost = 0.5;
static constexpr float kHashColumnCost = 0.3;

/// Cost of getting a column from a hash table
static constexpr float kHashExtractColumnCost = 0.5;
static constexpr float kHashExtractColumnCost = 0.3;

/// Cost of sum/min/max. A little more than getting a value from the a hash
/// table.
static constexpr float kSimpleAggregateCost = kHashExtractColumnCost * 1.5;

/// Bytes of overhead for a hash table row: ~12 bytes for the table and ~12
/// bytes for the row.
static constexpr float kHashRowBytes = 24;

/// Minimal cost of calling a filter function, e.g. comparing two numeric
/// exprss.
static constexpr float kMinimumFilterCost = 2;

// Multiplier to apply to shuffle byte volume to get CPU cost. A
// complete cost model will need to consider the count of
// destinations, number of partition keys etc.
static constexpr float kByteShuffleCost = 0.3;
};

/// Returns shuffle cost for a single row. Depends on the number of types of
Expand Down
33 changes: 31 additions & 2 deletions axiom/optimizer/DerivedTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,9 @@ void DerivedTable::import(
PlanObjectCP firstTable,
const PlanObjectSet& superTables,
const std::vector<PlanObjectSet>& existences,
float existsFanout) {
float existsFanout,
PlanObjectSet extraConjuncts,
PlanObjectSet projected) {
tableSet = superTables;
tables = superTables.toObjects();

Expand Down Expand Up @@ -342,11 +344,24 @@ void DerivedTable::import(
noImportOfExists = true;
}

conjuncts = extraConjuncts.toObjects<Expr>();

if (firstTable->is(PlanType::kDerivedTableNode)) {
importJoinsIntoFirstDt(firstTable->as<DerivedTable>());
} else {
fullyImported = superTables;
}

// If there are many tables or we have a wrapped dt, columns should be
// explicitly projected out.
if (tables.size() > 1 || tables[0]->is(PlanType::kDerivedTableNode)) {
projected.forEach<Column>([&](auto column) {
if (std::find(columns.begin(), columns.end(), column) == columns.end()) {
exprs.push_back(column);
columns.push_back(column);
}
});
}
linkTablesToJoins();
}

Expand Down Expand Up @@ -489,6 +504,12 @@ ExprCP replaceInputs(ExprCP expr, const T& source, const U& target) {

} // namespace

bool DerivedTable::isWrapOnly() const {
return tables.size() == 1 && tables[0]->is(PlanType::kDerivedTableNode) &&
!hasLimit() && !hasOrderBy() && conjuncts.empty() && !hasAggregation() &&
exprs.empty();
}

ExprCP DerivedTable::exportExpr(ExprCP expr) {
return replaceInputs(expr, exprs, columns);
}
Expand All @@ -498,7 +519,7 @@ ExprCP DerivedTable::importExpr(ExprCP expr) {
}

void DerivedTable::importJoinsIntoFirstDt(const DerivedTable* firstDt) {
if (tables.size() == 1 && tables[0]->is(PlanType::kDerivedTableNode)) {
if (isWrapOnly()) {
flattenDt(tables[0]->as<DerivedTable>());
return;
}
Expand All @@ -518,6 +539,8 @@ void DerivedTable::importJoinsIntoFirstDt(const DerivedTable* firstDt) {
}

auto* newFirst = make<DerivedTable>(*firstDt->as<DerivedTable>());
newFirst->cname = firstDt->as<DerivedTable>()->cname;
int32_t previousNumJoins = newFirst->joins.size();
for (auto& join : joins) {
auto other = join->otherSide(firstDt);
if (!other) {
Expand Down Expand Up @@ -557,6 +580,7 @@ void DerivedTable::importJoinsIntoFirstDt(const DerivedTable* firstDt) {
}
} else {
auto* chainDt = make<DerivedTable>();
chainDt->cname = toName(fmt::format("rdt{}", chainDt->id()));
PlanObjectSet chainSet;
chainSet.add(other);
if (fullyImported) {
Expand All @@ -583,6 +607,9 @@ void DerivedTable::importJoinsIntoFirstDt(const DerivedTable* firstDt) {
}
}

for (auto i = previousNumJoins; i < newFirst->joins.size(); ++i) {
newFirst->joins[i]->guessFanout();
}
VELOX_CHECK_EQ(tables.size(), 1);
for (auto i = 0; i < initialTables.size(); ++i) {
if (!newFirst->fullyImported.contains(initialTables[i])) {
Expand All @@ -605,6 +632,8 @@ void DerivedTable::flattenDt(const DerivedTable* dt) {
importedExistences.unionSet(dt->importedExistences);
aggregation = dt->aggregation;
having = dt->having;
limit = dt->limit;
offset = dt->offset;
}

void DerivedTable::makeProjection(const ExprVector& exprs) {
Expand Down
8 changes: 7 additions & 1 deletion axiom/optimizer/DerivedTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ struct DerivedTable : public PlanObject {
PlanObjectCP firstTable,
const PlanObjectSet& superTables,
const std::vector<PlanObjectSet>& existences,
float existsFanout = 1);
float existsFanout = 1,
PlanObjectSet extraConjuncts = PlanObjectSet(),
PlanObjectSet columns = PlanObjectSet());

/// Return a copy of 'expr', replacing references to this DT's 'columns' with
/// corresponding 'exprs'.
Expand Down Expand Up @@ -212,6 +214,10 @@ struct DerivedTable : public PlanObject {
return limit >= 0;
}

// True if contains one derived table in 'tables' and adds no change to its
// result set.
bool isWrapOnly() const;

void addJoinedBy(JoinEdgeP join);

/// Memoizes plans for 'this' and fills in 'cardinality'. Needed before adding
Expand Down
Loading