Skip to content

Commit 5c9f9dd

Browse files
Ke Wangmeta-codesync[bot]
authored andcommitted
fix: Add missing metadataFilter_ transfer for split prefetch (#15385)
Summary: Pull Request resolved: #15385 We're seeing crash when prefetch is turned on, ASAN log P2022180855 In `setFromDataSource`: 1. `scanSpec_ = std::move(source->scanSpec_);` - The scanSpec is moved from old source to new one 2. The `scanSpec_` contains raw pointers*to Filter objects owned by `source->metadataFilter_` 3. But `metadataFilter_` is **NOT** transferred from source to this 4. When `sourceUnique` (the old source) is destroyed at the end of the function, `source->metadataFilter_` is destroyed 5. The `scanSpec_` in the new HiveDataSource now has **dangling pointers** to the destroyed Filters 6. Later async operations try to use these pointers → heap-use-after-free Reviewed By: Yuhta Differential Revision: D86178967 fbshipit-source-id: 26bdd74a1339f38472ada0ef2308624898c71301
1 parent e174b46 commit 5c9f9dd

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

velox/connectors/hive/HiveDataSource.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,7 @@ void HiveDataSource::setFromDataSource(
505505
readerOutputType_ = std::move(source->readerOutputType_);
506506
source->scanSpec_->moveAdaptationFrom(*scanSpec_);
507507
scanSpec_ = std::move(source->scanSpec_);
508+
metadataFilter_ = std::move(source->metadataFilter_);
508509
splitReader_ = std::move(source->splitReader_);
509510
splitReader_->setConnectorQueryCtx(connectorQueryCtx_);
510511
// New io will be accounted on the stats of 'source'. Add the existing

velox/exec/tests/TableScanTest.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4318,6 +4318,53 @@ TEST_F(TableScanTest, parallelPrepare) {
43184318
.copyResults(pool_.get());
43194319
}
43204320

4321+
TEST_F(TableScanTest, parallelPrepareWithSubfieldFilters) {
4322+
// Test metadataFilter is correctly transferred during split prefetch.
4323+
constexpr int32_t kNumParallel = 100;
4324+
auto data = makeRowVector({
4325+
makeFlatVector<int32_t>(100, [](auto row) { return row; }),
4326+
makeFlatVector<int64_t>(100, [](auto row) { return row * 2; }),
4327+
});
4328+
4329+
auto filePath = TempFilePath::create();
4330+
writeToFile(filePath->getPath(), {data});
4331+
4332+
auto subfieldFilters = SubfieldFiltersBuilder()
4333+
.add("c0", greaterThanOrEqual(10))
4334+
.add("c1", lessThan(150))
4335+
.build();
4336+
4337+
auto outputType = ROW({"c0", "c1"}, {INTEGER(), BIGINT()});
4338+
auto remainingFilter = parseExpr("c0 % 2 = 0", outputType);
4339+
auto tableHandle =
4340+
makeTableHandle(std::move(subfieldFilters), remainingFilter);
4341+
auto assignments = allRegularColumns(outputType);
4342+
4343+
auto plan = exec::test::PlanBuilder(pool_.get())
4344+
.startTableScan()
4345+
.outputType(outputType)
4346+
.tableHandle(tableHandle)
4347+
.assignments(assignments)
4348+
.endTableScan()
4349+
.planNode();
4350+
4351+
std::vector<exec::Split> splits;
4352+
for (auto i = 0; i < kNumParallel; ++i) {
4353+
splits.push_back(makeHiveSplit(filePath->getPath()));
4354+
}
4355+
4356+
auto result = AssertQueryBuilder(plan)
4357+
.config(
4358+
core::QueryConfig::kMaxSplitPreloadPerDriver,
4359+
std::to_string(kNumParallel))
4360+
.splits(splits)
4361+
.copyResults(pool_.get());
4362+
4363+
// Verify results: c0 >= 10 AND c1 < 150 AND c0 % 2 = 0
4364+
// So rows [10,12,14,...,74] = 33 rows per split
4365+
ASSERT_EQ(result->size(), 33 * kNumParallel);
4366+
}
4367+
43214368
TEST_F(TableScanTest, dictionaryMemo) {
43224369
constexpr int kSize = 100;
43234370
const char* baseStrings[] = {

0 commit comments

Comments
 (0)