@@ -64,6 +64,8 @@ use crate::{
6464/// Maximum accepted scroll TTL.
6565const MAX_SCROLL_TTL : Duration = Duration :: from_secs ( DELETION_GRACE_PERIOD . as_secs ( ) - 60 * 2 ) ;
6666
67+ const SORT_DOC_FIELD_NAMES : & [ & str ] = & [ "_shard_doc" , "_doc" ] ;
68+
6769/// SearchJob to be assigned to search clients by the [`SearchJobPlacer`].
6870#[ derive( Debug , Clone , PartialEq ) ]
6971pub struct SearchJob {
@@ -371,6 +373,20 @@ fn validate_sort_by_fields_and_search_after(
371373 let Some ( search_after_partial_hit) = search_after. as_ref ( ) else {
372374 return Ok ( ( ) ) ;
373375 } ;
376+
377+ let sort_fields_without_doc_count = sort_fields
378+ . iter ( )
379+ . filter ( |sort_field| !SORT_DOC_FIELD_NAMES . contains ( & sort_field. field_name . as_str ( ) ) )
380+ . count ( ) ;
381+ let has_doc_sort_field = sort_fields_without_doc_count != sort_fields. len ( ) ;
382+ if has_doc_sort_field && search_after_partial_hit. split_id . is_empty ( ) {
383+ return Err ( SearchError :: InvalidArgument (
384+ "search_after with a sort field `_doc` must define a split ID, segment ID and doc ID \
385+ values"
386+ . to_string ( ) ,
387+ ) ) ;
388+ }
389+
374390 let mut search_after_sort_value_count = 0 ;
375391 // TODO: we could validate if the search after sort value types of consistent with the sort
376392 // field types.
@@ -384,7 +400,7 @@ fn validate_sort_by_fields_and_search_after(
384400 . context ( "sort value must be set" ) ?;
385401 search_after_sort_value_count += 1 ;
386402 }
387- if search_after_sort_value_count != sort_fields . len ( ) {
403+ if search_after_sort_value_count != sort_fields_without_doc_count {
388404 return Err ( SearchError :: InvalidArgument ( format ! (
389405 "`search_after` must have the same number of sort values as sort by fields {:?}" ,
390406 sort_fields
@@ -400,7 +416,7 @@ fn get_sort_by_field_entry<'a>(
400416 field_name : & str ,
401417 schema : & ' a Schema ,
402418) -> crate :: Result < Option < & ' a FieldEntry > > {
403- if [ "_score" , "_shard_doc" , "_doc" ] . contains ( & field_name) {
419+ if "_score" == field_name || SORT_DOC_FIELD_NAMES . contains ( & field_name) {
404420 return Ok ( None ) ;
405421 }
406422 let dynamic_field_opt = schema. get_field ( DYNAMIC_FIELD_NAME ) . ok ( ) ;
@@ -1880,6 +1896,32 @@ mod tests {
18801896 sort_value2 : Some ( SortByValue {
18811897 sort_value : Some ( SortValue :: U64 ( 2 ) ) ,
18821898 } ) ,
1899+ split_id : "" . to_string ( ) ,
1900+ segment_ord : 0 ,
1901+ doc_id : 0 ,
1902+ } ;
1903+ validate_sort_by_fields_and_search_after ( & sort_fields, & Some ( partial_hit) ) . unwrap ( ) ;
1904+ }
1905+
1906+ #[ test]
1907+ fn test_validate_sort_by_fields_and_search_after_ok_with_doc_sort_field ( ) {
1908+ let sort_fields = vec ! [
1909+ SortField {
1910+ field_name: "timestamp" . to_string( ) ,
1911+ sort_order: 0 ,
1912+ sort_datetime_format: Some ( SortDatetimeFormat :: UnixTimestampMillis as i32 ) ,
1913+ } ,
1914+ SortField {
1915+ field_name: "_doc" . to_string( ) ,
1916+ sort_order: 0 ,
1917+ sort_datetime_format: None ,
1918+ } ,
1919+ ] ;
1920+ let partial_hit = PartialHit {
1921+ sort_value : Some ( SortByValue {
1922+ sort_value : Some ( SortValue :: U64 ( 1 ) ) ,
1923+ } ) ,
1924+ sort_value2 : None ,
18831925 split_id : "split1" . to_string ( ) ,
18841926 segment_ord : 1 ,
18851927 doc_id : 1 ,
@@ -1962,6 +2004,72 @@ mod tests {
19622004 ) ;
19632005 }
19642006
2007+ #[ test]
2008+ fn test_validate_sort_by_fields_and_search_after_invalid_with_missing_split_id ( ) {
2009+ // 2 sort fields + search after with only one sort value is invalid.
2010+ let sort_fields = vec ! [
2011+ SortField {
2012+ field_name: "timestamp" . to_string( ) ,
2013+ sort_order: 0 ,
2014+ sort_datetime_format: Some ( SortDatetimeFormat :: UnixTimestampMillis as i32 ) ,
2015+ } ,
2016+ SortField {
2017+ field_name: "_doc" . to_string( ) ,
2018+ sort_order: 0 ,
2019+ sort_datetime_format: None ,
2020+ } ,
2021+ ] ;
2022+ let partial_hit = PartialHit {
2023+ sort_value : Some ( SortByValue {
2024+ sort_value : Some ( SortValue :: U64 ( 1 ) ) ,
2025+ } ) ,
2026+ sort_value2 : None ,
2027+ split_id : "" . to_string ( ) ,
2028+ segment_ord : 1 ,
2029+ doc_id : 1 ,
2030+ } ;
2031+ let error =
2032+ validate_sort_by_fields_and_search_after ( & sort_fields, & Some ( partial_hit) ) . unwrap_err ( ) ;
2033+ assert_eq ! (
2034+ error. to_string( ) ,
2035+ "Invalid argument: search_after with a sort field `_doc` must define a split ID, \
2036+ segment ID and doc ID values"
2037+ ) ;
2038+ }
2039+
2040+ #[ test]
2041+ fn test_validate_sort_by_fields_and_search_valid_1 ( ) {
2042+ // 2 sort fields + search after with only one sort value is invalid.
2043+ let sort_fields = vec ! [
2044+ SortField {
2045+ field_name: "timestamp" . to_string( ) ,
2046+ sort_order: 0 ,
2047+ sort_datetime_format: Some ( SortDatetimeFormat :: UnixTimestampMillis as i32 ) ,
2048+ } ,
2049+ SortField {
2050+ field_name: "id" . to_string( ) ,
2051+ sort_order: 0 ,
2052+ sort_datetime_format: None ,
2053+ } ,
2054+ ] ;
2055+ let partial_hit = PartialHit {
2056+ sort_value : Some ( SortByValue {
2057+ sort_value : Some ( SortValue :: U64 ( 1 ) ) ,
2058+ } ) ,
2059+ sort_value2 : None ,
2060+ split_id : "split1" . to_string ( ) ,
2061+ segment_ord : 1 ,
2062+ doc_id : 1 ,
2063+ } ;
2064+ let error =
2065+ validate_sort_by_fields_and_search_after ( & sort_fields, & Some ( partial_hit) ) . unwrap_err ( ) ;
2066+ assert_eq ! (
2067+ error. to_string( ) ,
2068+ "Invalid argument: `search_after` must have the same number of sort values as sort by \
2069+ fields [\" timestamp\" , \" id\" ]"
2070+ ) ;
2071+ }
2072+
19652073 #[ test]
19662074 fn test_validate_sort_by_field_type_invalid ( ) {
19672075 // sort non-datetime field with a datetime format is invalid.
0 commit comments