Skip to content

Commit 9a38e5a

Browse files
Fix search_after validation. (#4347)
* Fix search_after validation. * Update quickwit/quickwit-search/src/root.rs Co-authored-by: trinity-1686a <[email protected]> --------- Co-authored-by: trinity-1686a <[email protected]>
1 parent 8054eab commit 9a38e5a

File tree

1 file changed

+110
-2
lines changed
  • quickwit/quickwit-search/src

1 file changed

+110
-2
lines changed

quickwit/quickwit-search/src/root.rs

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ use crate::{
6464
/// Maximum accepted scroll TTL.
6565
const MAX_SCROLL_TTL: Duration = Duration::from_secs(DELETION_GRACE_PERIOD.as_secs() - 60 * 2);
6666

67+
const SORT_DOC_FIELD_NAMES: &[&str] = &["_shard_doc", "_doc"];
68+
6769
/// SearchJob to be assigned to search clients by the [`SearchJobPlacer`].
6870
#[derive(Debug, Clone, PartialEq)]
6971
pub struct SearchJob {
@@ -371,6 +373,20 @@ fn validate_sort_by_fields_and_search_after(
371373
let Some(search_after_partial_hit) = search_after.as_ref() else {
372374
return Ok(());
373375
};
376+
377+
let sort_fields_without_doc_count = sort_fields
378+
.iter()
379+
.filter(|sort_field| !SORT_DOC_FIELD_NAMES.contains(&sort_field.field_name.as_str()))
380+
.count();
381+
let has_doc_sort_field = sort_fields_without_doc_count != sort_fields.len();
382+
if has_doc_sort_field && search_after_partial_hit.split_id.is_empty() {
383+
return Err(SearchError::InvalidArgument(
384+
"search_after with a sort field `_doc` must define a split ID, segment ID and doc ID \
385+
values"
386+
.to_string(),
387+
));
388+
}
389+
374390
let mut search_after_sort_value_count = 0;
375391
// TODO: we could validate if the search after sort value types of consistent with the sort
376392
// field types.
@@ -384,7 +400,7 @@ fn validate_sort_by_fields_and_search_after(
384400
.context("sort value must be set")?;
385401
search_after_sort_value_count += 1;
386402
}
387-
if search_after_sort_value_count != sort_fields.len() {
403+
if search_after_sort_value_count != sort_fields_without_doc_count {
388404
return Err(SearchError::InvalidArgument(format!(
389405
"`search_after` must have the same number of sort values as sort by fields {:?}",
390406
sort_fields
@@ -400,7 +416,7 @@ fn get_sort_by_field_entry<'a>(
400416
field_name: &str,
401417
schema: &'a Schema,
402418
) -> crate::Result<Option<&'a FieldEntry>> {
403-
if ["_score", "_shard_doc", "_doc"].contains(&field_name) {
419+
if "_score" == field_name || SORT_DOC_FIELD_NAMES.contains(&field_name) {
404420
return Ok(None);
405421
}
406422
let dynamic_field_opt = schema.get_field(DYNAMIC_FIELD_NAME).ok();
@@ -1880,6 +1896,32 @@ mod tests {
18801896
sort_value2: Some(SortByValue {
18811897
sort_value: Some(SortValue::U64(2)),
18821898
}),
1899+
split_id: "".to_string(),
1900+
segment_ord: 0,
1901+
doc_id: 0,
1902+
};
1903+
validate_sort_by_fields_and_search_after(&sort_fields, &Some(partial_hit)).unwrap();
1904+
}
1905+
1906+
#[test]
1907+
fn test_validate_sort_by_fields_and_search_after_ok_with_doc_sort_field() {
1908+
let sort_fields = vec![
1909+
SortField {
1910+
field_name: "timestamp".to_string(),
1911+
sort_order: 0,
1912+
sort_datetime_format: Some(SortDatetimeFormat::UnixTimestampMillis as i32),
1913+
},
1914+
SortField {
1915+
field_name: "_doc".to_string(),
1916+
sort_order: 0,
1917+
sort_datetime_format: None,
1918+
},
1919+
];
1920+
let partial_hit = PartialHit {
1921+
sort_value: Some(SortByValue {
1922+
sort_value: Some(SortValue::U64(1)),
1923+
}),
1924+
sort_value2: None,
18831925
split_id: "split1".to_string(),
18841926
segment_ord: 1,
18851927
doc_id: 1,
@@ -1962,6 +2004,72 @@ mod tests {
19622004
);
19632005
}
19642006

2007+
#[test]
2008+
fn test_validate_sort_by_fields_and_search_after_invalid_with_missing_split_id() {
2009+
// 2 sort fields + search after with only one sort value is invalid.
2010+
let sort_fields = vec![
2011+
SortField {
2012+
field_name: "timestamp".to_string(),
2013+
sort_order: 0,
2014+
sort_datetime_format: Some(SortDatetimeFormat::UnixTimestampMillis as i32),
2015+
},
2016+
SortField {
2017+
field_name: "_doc".to_string(),
2018+
sort_order: 0,
2019+
sort_datetime_format: None,
2020+
},
2021+
];
2022+
let partial_hit = PartialHit {
2023+
sort_value: Some(SortByValue {
2024+
sort_value: Some(SortValue::U64(1)),
2025+
}),
2026+
sort_value2: None,
2027+
split_id: "".to_string(),
2028+
segment_ord: 1,
2029+
doc_id: 1,
2030+
};
2031+
let error =
2032+
validate_sort_by_fields_and_search_after(&sort_fields, &Some(partial_hit)).unwrap_err();
2033+
assert_eq!(
2034+
error.to_string(),
2035+
"Invalid argument: search_after with a sort field `_doc` must define a split ID, \
2036+
segment ID and doc ID values"
2037+
);
2038+
}
2039+
2040+
#[test]
2041+
fn test_validate_sort_by_fields_and_search_valid_1() {
2042+
// 2 sort fields + search after with only one sort value is invalid.
2043+
let sort_fields = vec![
2044+
SortField {
2045+
field_name: "timestamp".to_string(),
2046+
sort_order: 0,
2047+
sort_datetime_format: Some(SortDatetimeFormat::UnixTimestampMillis as i32),
2048+
},
2049+
SortField {
2050+
field_name: "id".to_string(),
2051+
sort_order: 0,
2052+
sort_datetime_format: None,
2053+
},
2054+
];
2055+
let partial_hit = PartialHit {
2056+
sort_value: Some(SortByValue {
2057+
sort_value: Some(SortValue::U64(1)),
2058+
}),
2059+
sort_value2: None,
2060+
split_id: "split1".to_string(),
2061+
segment_ord: 1,
2062+
doc_id: 1,
2063+
};
2064+
let error =
2065+
validate_sort_by_fields_and_search_after(&sort_fields, &Some(partial_hit)).unwrap_err();
2066+
assert_eq!(
2067+
error.to_string(),
2068+
"Invalid argument: `search_after` must have the same number of sort values as sort by \
2069+
fields [\"timestamp\", \"id\"]"
2070+
);
2071+
}
2072+
19652073
#[test]
19662074
fn test_validate_sort_by_field_type_invalid() {
19672075
// sort non-datetime field with a datetime format is invalid.

0 commit comments

Comments
 (0)