Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion_common::config::{CsvOptions, JsonOptions};
use datafusion_common::{
exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err,
Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError,
TableReference, UnnestOptions,
unqualified_field_not_found, Column, DFSchema, DataFusionError, ParamValues,
ScalarValue, SchemaError, TableReference, UnnestOptions,
};
use datafusion_expr::select_expr::SelectExpr;
use datafusion_expr::{
Expand Down Expand Up @@ -310,11 +310,20 @@ impl DataFrame {
pub fn select_columns(self, columns: &[&str]) -> Result<DataFrame> {
let fields = columns
.iter()
.flat_map(|name| {
self.plan
.map(|name| {
let fields = self
.plan
.schema()
.qualified_fields_with_unqualified_name(name)
.qualified_fields_with_unqualified_name(name);
if fields.is_empty() {
Err(unqualified_field_not_found(name, self.plan.schema()))
} else {
Ok(fields)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we also have to check that we found all the fields?

Like what if some fields are found and some are not 🤔

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do think this check will ensure we have all the fields. In fact, the unit test I added has 1 field that does exist and 2 that do not.

The if a single one of the requested columns returns no fields, then the entire operation should fail. Is there a different case you're thinking of?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope -- this looks good to me

}
})
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.flatten()
.collect::<Vec<_>>();
let expr: Vec<Expr> = fields
.into_iter()
Expand Down
15 changes: 13 additions & 2 deletions datafusion/core/tests/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ use datafusion::test_util::{
use datafusion_catalog::TableProvider;
use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
use datafusion_common::{
assert_contains, internal_datafusion_err, Constraint, Constraints, DFSchema,
DataFusionError, ScalarValue, TableReference, UnnestOptions,
assert_contains, internal_datafusion_err, internal_err, Constraint, Constraints,
DFSchema, DataFusionError, ScalarValue, TableReference, UnnestOptions,
};
use datafusion_common_runtime::SpawnedTask;
use datafusion_datasource::file_format::format_as_file_type;
Expand Down Expand Up @@ -305,6 +305,17 @@ async fn select_columns() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn select_columns_with_nonexistent_columns() -> Result<()> {
let t = test_table().await?;
let t2 = t.select_columns(&["canada", "c2", "rocks"]);
let Err(DataFusionError::SchemaError(_, _)) = t2 else {
return internal_err!("select_columns with nonexistent columns should error");
};

Ok(())
}

#[tokio::test]
async fn select_expr() -> Result<()> {
// build plan using Table API
Expand Down
Loading