diff --git a/crates/functions/src/datetime/date_diff.rs b/crates/functions/src/datetime/date_diff.rs index 0df0489e..29f85b54 100644 --- a/crates/functions/src/datetime/date_diff.rs +++ b/crates/functions/src/datetime/date_diff.rs @@ -100,13 +100,6 @@ impl DateDiffFunc { let arr1 = cast(lhs, &DataType::Timestamp(TimeUnit::Nanosecond, None))?; let arr2 = cast(rhs, &DataType::Timestamp(TimeUnit::Nanosecond, None))?; - let diff = sub(&arr2, &arr1)?; - let diff_arr = diff - .as_any() - .downcast_ref::() - .context(dtime_errors::CantCastToSnafu { - v: "duration_nsec".to_string(), - })?; match unit_type { DatePart::Quarter | DatePart::Year | DatePart::YearISO => { let arr1 = &date_part(&arr1, unit_type)?; @@ -136,37 +129,25 @@ impl DateDiffFunc { let result = cast(&result, &DataType::Int64)?; Ok(ColumnarValue::Array(Arc::new(result))) } - DatePart::Week | DatePart::WeekISO => Ok(self.weeks_diff(diff_arr)), - DatePart::Day | DatePart::DayOfYear => Ok(Self::diff(diff_arr, 86_400 * SECOND)), - DatePart::Hour => { - let nanos_in_hour: i64 = 3_600 * SECOND; - let arr1 = &date_part(&arr1, unit_type)?; - let arr2 = &date_part(&arr2, unit_type)?; - let hours_diff = cast(&sub(&arr2, &arr1)?, &DataType::Int64)?; - let hours_arr = as_int64_array(&hours_diff)?; - - let result = diff_arr - .iter() - .zip(hours_arr.iter()) - .map(|(nanos, diff)| match (nanos, diff) { - (Some(n), Some(hours_diff)) => { - let res = n.div_euclid(nanos_in_hour); - if hours_diff != 0 { - Some(res + 1) - } else { - Some(res) - } - } - _ => None, - }) - .collect::(); - Ok(ColumnarValue::Array(Arc::new(result))) + DatePart::Week | DatePart::WeekISO => { + let diff = sub(&arr2, &arr1)?; + let diff_arr = diff + .as_any() + .downcast_ref::() + .context(dtime_errors::CantCastToSnafu { + v: "duration_nsec".to_string(), + })?; + Ok(self.weeks_diff(diff_arr)) + } + DatePart::Day | DatePart::DayOfYear => { + Self::boundary_diff(&arr1, &arr2, 86_400 * SECOND) } - DatePart::Minute => Ok(Self::diff(diff_arr, 60 * SECOND)), - DatePart::Second => Ok(Self::diff(diff_arr, SECOND)), - DatePart::Millisecond => Ok(Self::diff(diff_arr, 1_000_000)), - DatePart::Microsecond => Ok(Self::diff(diff_arr, 1_000)), - _ => Ok(Self::diff(diff_arr, 1)), + DatePart::Hour => Self::boundary_diff(&arr1, &arr2, 3_600 * SECOND), + DatePart::Minute => Self::boundary_diff(&arr1, &arr2, 60 * SECOND), + DatePart::Second => Self::boundary_diff(&arr1, &arr2, SECOND), + DatePart::Millisecond => Self::boundary_diff(&arr1, &arr2, 1_000_000), + DatePart::Microsecond => Self::boundary_diff(&arr1, &arr2, 1_000), + _ => Self::boundary_diff(&arr1, &arr2, 1), } } @@ -182,26 +163,15 @@ impl DateDiffFunc { | DatePart::Millisecond | DatePart::Microsecond | DatePart::Nanosecond => { - // Cast TIME to Int64 nanoseconds from midnight, compute diff - let lhs_i64 = cast(lhs, &DataType::Int64)?; - let rhs_i64 = cast(rhs, &DataType::Int64)?; - let diff_i64 = sub(&rhs_i64, &lhs_i64)?; - // Convert to Duration(Ns) to reuse the generic diff logic - let diff_ns = cast(&diff_i64, &DataType::Duration(TimeUnit::Nanosecond))?; - let diff_arr = diff_ns - .as_any() - .downcast_ref::() - .context(dtime_errors::CantCastToSnafu { - v: "duration_nsec".to_string(), - })?; - Ok(match unit_type { - DatePart::Hour => Self::diff(diff_arr, 3_600 * SECOND), - DatePart::Minute => Self::diff(diff_arr, 60 * SECOND), - DatePart::Second => Self::diff(diff_arr, SECOND), - DatePart::Millisecond => Self::diff(diff_arr, 1_000_000), - DatePart::Microsecond => Self::diff(diff_arr, 1_000), - _ => Self::diff(diff_arr, 1), - }) + let coef = match unit_type { + DatePart::Hour => 3_600 * SECOND, + DatePart::Minute => 60 * SECOND, + DatePart::Second => SECOND, + DatePart::Millisecond => 1_000_000, + DatePart::Microsecond => 1_000, + _ => 1, + }; + Self::boundary_diff(lhs, rhs, coef) } _ => dtime_errors::DateDiffInvalidComponentForTimeSnafu { component: format!("{unit_type:?}"), @@ -234,12 +204,28 @@ impl DateDiffFunc { ColumnarValue::Array(Arc::new(diff)) } - fn diff(diff_arr: &DurationNanosecondArray, coef: i64) -> ColumnarValue { - let diff_arr: Int64Array = diff_arr.unary(|x| { - let div = x / coef; - if x % coef == 0 { div } else { div + 1 } - }); - ColumnarValue::Array(Arc::new(diff_arr)) + // Snowflake's DATEDIFF returns the number of `part`-boundaries crossed + // between the two endpoints, not the fractional elapsed duration. We + // implement that by truncating each endpoint to `coef` precision + // independently (floor division) and subtracting the integer quotients. + fn boundary_diff( + lhs: &Arc, + rhs: &Arc, + coef: i64, + ) -> Result { + let lhs_i64 = cast(lhs, &DataType::Int64)?; + let rhs_i64 = cast(rhs, &DataType::Int64)?; + let a = as_int64_array(&lhs_i64)?; + let b = as_int64_array(&rhs_i64)?; + let result: Int64Array = a + .iter() + .zip(b.iter()) + .map(|(a, b)| match (a, b) { + (Some(a), Some(b)) => Some(b.div_euclid(coef) - a.div_euclid(coef)), + _ => None, + }) + .collect(); + Ok(ColumnarValue::Array(Arc::new(result))) } } diff --git a/crates/functions/src/tests/datetime/datediff.rs b/crates/functions/src/tests/datetime/datediff.rs index 72b21138..7616c850 100644 --- a/crates/functions/src/tests/datetime/datediff.rs +++ b/crates/functions/src/tests/datetime/datediff.rs @@ -58,3 +58,64 @@ test_query!( CAST('1970-02-01 00:15:00' AS TIMESTAMP)) AS date_time", snapshot_path = "datediff" ); + +// DATEDIFF uses boundary-count semantics (matches Snowflake), not +// ceiling-of-duration. These cases all produce 0 because the endpoints +// sit in the same `part` bucket even though the true duration is positive. +test_query!( + boundary_count_same_bucket, + "SELECT + DATEDIFF('second', + TIMESTAMP '2020-01-01 00:00:00.100', + TIMESTAMP '2020-01-01 00:00:00.900') AS sec_sub, + DATEDIFF('minute', + TIMESTAMP '2020-01-01 00:00:05', + TIMESTAMP '2020-01-01 00:00:55') AS min_sub, + DATEDIFF('hour', + TIMESTAMP '2020-01-01 01:30:00', + TIMESTAMP '2020-01-01 01:50:00') AS hour_sub, + DATEDIFF('day', + TIMESTAMP '2020-01-01 08:00:00', + TIMESTAMP '2020-01-01 20:00:00') AS day_sub;", + snapshot_path = "datediff" +); + +// Endpoints straddle a single boundary: DATEDIFF returns 1 even when the +// true elapsed duration is less than one full unit. +test_query!( + boundary_count_straddle, + "SELECT + DATEDIFF('second', + TIMESTAMP '2020-01-01 00:00:00.900', + TIMESTAMP '2020-01-01 00:00:01.100') AS sec_straddle, + DATEDIFF('minute', + TIMESTAMP '2020-01-01 01:00:55', + TIMESTAMP '2020-01-01 01:01:05') AS min_straddle, + DATEDIFF('hour', + TIMESTAMP '2020-01-01 01:55:00', + TIMESTAMP '2020-01-01 02:05:00') AS hour_straddle, + DATEDIFF('day', + TIMESTAMP '2020-01-01 23:00:00', + TIMESTAMP '2020-01-02 01:00:00') AS day_straddle;", + snapshot_path = "datediff" +); + +// Counts boundaries, not rounded duration: a 1.5-second span that crosses +// exactly one second-boundary returns 1, not 2 (CEIL(1.5) = 2 would be wrong). +test_query!( + boundary_count_not_ceiling, + "SELECT + DATEDIFF('second', + TIMESTAMP '2020-01-01 00:00:00.250', + TIMESTAMP '2020-01-01 00:00:01.750') AS sec_1_5, + DATEDIFF('second', + TIMESTAMP '2020-01-01 00:00:00.500', + TIMESTAMP '2020-01-01 00:00:02.900') AS sec_2_4, + DATEDIFF('second', + TIMESTAMP '2020-01-01 00:00:00.000', + TIMESTAMP '2020-01-01 00:00:02.500') AS sec_2_5, + DATEDIFF('hour', + TIMESTAMP '2020-01-01 01:30:00', + TIMESTAMP '2020-01-01 02:30:00') AS hour_1h_two_buckets;", + snapshot_path = "datediff" +); diff --git a/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_not_ceiling.snap b/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_not_ceiling.snap new file mode 100644 index 00000000..0b879da2 --- /dev/null +++ b/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_not_ceiling.snap @@ -0,0 +1,14 @@ +--- +source: crates/functions/src/tests/datetime/datediff.rs +assertion_line: 105 +description: "\"SELECT\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.250',\n TIMESTAMP '2020-01-01 00:00:01.750') AS sec_1_5,\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.500',\n TIMESTAMP '2020-01-01 00:00:02.900') AS sec_2_4,\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.000',\n TIMESTAMP '2020-01-01 00:00:02.500') AS sec_2_5,\n DATEDIFF('hour',\n TIMESTAMP '2020-01-01 01:30:00',\n TIMESTAMP '2020-01-01 02:30:00') AS hour_1h_two_buckets;\"" +--- +Ok( + [ + "+---------+---------+---------+---------------------+", + "| sec_1_5 | sec_2_4 | sec_2_5 | hour_1h_two_buckets |", + "+---------+---------+---------+---------------------+", + "| 1 | 2 | 2 | 1 |", + "+---------+---------+---------+---------------------+", + ], +) diff --git a/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_same_bucket.snap b/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_same_bucket.snap new file mode 100644 index 00000000..04f29a7e --- /dev/null +++ b/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_same_bucket.snap @@ -0,0 +1,14 @@ +--- +source: crates/functions/src/tests/datetime/datediff.rs +assertion_line: 65 +description: "\"SELECT\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.100',\n TIMESTAMP '2020-01-01 00:00:00.900') AS sec_sub,\n DATEDIFF('minute',\n TIMESTAMP '2020-01-01 00:00:05',\n TIMESTAMP '2020-01-01 00:00:55') AS min_sub,\n DATEDIFF('hour',\n TIMESTAMP '2020-01-01 01:30:00',\n TIMESTAMP '2020-01-01 01:50:00') AS hour_sub,\n DATEDIFF('day',\n TIMESTAMP '2020-01-01 08:00:00',\n TIMESTAMP '2020-01-01 20:00:00') AS day_sub;\"" +--- +Ok( + [ + "+---------+---------+----------+---------+", + "| sec_sub | min_sub | hour_sub | day_sub |", + "+---------+---------+----------+---------+", + "| 0 | 0 | 0 | 0 |", + "+---------+---------+----------+---------+", + ], +) diff --git a/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_straddle.snap b/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_straddle.snap new file mode 100644 index 00000000..9ae07bcf --- /dev/null +++ b/crates/functions/src/tests/datetime/snapshots/datediff/query_boundary_count_straddle.snap @@ -0,0 +1,14 @@ +--- +source: crates/functions/src/tests/datetime/datediff.rs +assertion_line: 85 +description: "\"SELECT\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.900',\n TIMESTAMP '2020-01-01 00:00:01.100') AS sec_straddle,\n DATEDIFF('minute',\n TIMESTAMP '2020-01-01 01:00:55',\n TIMESTAMP '2020-01-01 01:01:05') AS min_straddle,\n DATEDIFF('hour',\n TIMESTAMP '2020-01-01 01:55:00',\n TIMESTAMP '2020-01-01 02:05:00') AS hour_straddle,\n DATEDIFF('day',\n TIMESTAMP '2020-01-01 23:00:00',\n TIMESTAMP '2020-01-02 01:00:00') AS day_straddle;\"" +--- +Ok( + [ + "+--------------+--------------+---------------+--------------+", + "| sec_straddle | min_straddle | hour_straddle | day_straddle |", + "+--------------+--------------+---------------+--------------+", + "| 1 | 1 | 1 | 1 |", + "+--------------+--------------+---------------+--------------+", + ], +) diff --git a/crates/functions/src/tests/datetime/snapshots/datediff/query_different_types.snap b/crates/functions/src/tests/datetime/snapshots/datediff/query_different_types.snap index 95042626..dbbe8e85 100644 --- a/crates/functions/src/tests/datetime/snapshots/datediff/query_different_types.snap +++ b/crates/functions/src/tests/datetime/snapshots/datediff/query_different_types.snap @@ -7,7 +7,7 @@ Ok( "+---------+---------+---------+---------+-----------+", "| ts_date | date_ts | ts_time | time_ts | date_time |", "+---------+---------+---------+---------+-----------+", - "| 6 | 7 | 5 | 5 | 44655 |", + "| 6 | 6 | 5 | 5 | 44655 |", "+---------+---------+---------+---------+-----------+", ], )