Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 49 additions & 63 deletions crates/functions/src/datetime/date_diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,6 @@ impl DateDiffFunc {

let arr1 = cast(lhs, &DataType::Timestamp(TimeUnit::Nanosecond, None))?;
let arr2 = cast(rhs, &DataType::Timestamp(TimeUnit::Nanosecond, None))?;
let diff = sub(&arr2, &arr1)?;
let diff_arr = diff
.as_any()
.downcast_ref::<DurationNanosecondArray>()
.context(dtime_errors::CantCastToSnafu {
v: "duration_nsec".to_string(),
})?;
match unit_type {
DatePart::Quarter | DatePart::Year | DatePart::YearISO => {
let arr1 = &date_part(&arr1, unit_type)?;
Expand Down Expand Up @@ -136,37 +129,25 @@ impl DateDiffFunc {
let result = cast(&result, &DataType::Int64)?;
Ok(ColumnarValue::Array(Arc::new(result)))
}
DatePart::Week | DatePart::WeekISO => Ok(self.weeks_diff(diff_arr)),
DatePart::Day | DatePart::DayOfYear => Ok(Self::diff(diff_arr, 86_400 * SECOND)),
DatePart::Hour => {
let nanos_in_hour: i64 = 3_600 * SECOND;
let arr1 = &date_part(&arr1, unit_type)?;
let arr2 = &date_part(&arr2, unit_type)?;
let hours_diff = cast(&sub(&arr2, &arr1)?, &DataType::Int64)?;
let hours_arr = as_int64_array(&hours_diff)?;

let result = diff_arr
.iter()
.zip(hours_arr.iter())
.map(|(nanos, diff)| match (nanos, diff) {
(Some(n), Some(hours_diff)) => {
let res = n.div_euclid(nanos_in_hour);
if hours_diff != 0 {
Some(res + 1)
} else {
Some(res)
}
}
_ => None,
})
.collect::<Int64Array>();
Ok(ColumnarValue::Array(Arc::new(result)))
DatePart::Week | DatePart::WeekISO => {
let diff = sub(&arr2, &arr1)?;
let diff_arr = diff
.as_any()
.downcast_ref::<DurationNanosecondArray>()
.context(dtime_errors::CantCastToSnafu {
v: "duration_nsec".to_string(),
})?;
Ok(self.weeks_diff(diff_arr))
}
DatePart::Day | DatePart::DayOfYear => {
Self::boundary_diff(&arr1, &arr2, 86_400 * SECOND)
}
DatePart::Minute => Ok(Self::diff(diff_arr, 60 * SECOND)),
DatePart::Second => Ok(Self::diff(diff_arr, SECOND)),
DatePart::Millisecond => Ok(Self::diff(diff_arr, 1_000_000)),
DatePart::Microsecond => Ok(Self::diff(diff_arr, 1_000)),
_ => Ok(Self::diff(diff_arr, 1)),
DatePart::Hour => Self::boundary_diff(&arr1, &arr2, 3_600 * SECOND),
DatePart::Minute => Self::boundary_diff(&arr1, &arr2, 60 * SECOND),
DatePart::Second => Self::boundary_diff(&arr1, &arr2, SECOND),
DatePart::Millisecond => Self::boundary_diff(&arr1, &arr2, 1_000_000),
DatePart::Microsecond => Self::boundary_diff(&arr1, &arr2, 1_000),
_ => Self::boundary_diff(&arr1, &arr2, 1),
}
}

Expand All @@ -182,26 +163,15 @@ impl DateDiffFunc {
| DatePart::Millisecond
| DatePart::Microsecond
| DatePart::Nanosecond => {
// Cast TIME to Int64 nanoseconds from midnight, compute diff
let lhs_i64 = cast(lhs, &DataType::Int64)?;
let rhs_i64 = cast(rhs, &DataType::Int64)?;
let diff_i64 = sub(&rhs_i64, &lhs_i64)?;
// Convert to Duration(Ns) to reuse the generic diff logic
let diff_ns = cast(&diff_i64, &DataType::Duration(TimeUnit::Nanosecond))?;
let diff_arr = diff_ns
.as_any()
.downcast_ref::<DurationNanosecondArray>()
.context(dtime_errors::CantCastToSnafu {
v: "duration_nsec".to_string(),
})?;
Ok(match unit_type {
DatePart::Hour => Self::diff(diff_arr, 3_600 * SECOND),
DatePart::Minute => Self::diff(diff_arr, 60 * SECOND),
DatePart::Second => Self::diff(diff_arr, SECOND),
DatePart::Millisecond => Self::diff(diff_arr, 1_000_000),
DatePart::Microsecond => Self::diff(diff_arr, 1_000),
_ => Self::diff(diff_arr, 1),
})
let coef = match unit_type {
DatePart::Hour => 3_600 * SECOND,
DatePart::Minute => 60 * SECOND,
DatePart::Second => SECOND,
DatePart::Millisecond => 1_000_000,
DatePart::Microsecond => 1_000,
_ => 1,
};
Self::boundary_diff(lhs, rhs, coef)
}
_ => dtime_errors::DateDiffInvalidComponentForTimeSnafu {
component: format!("{unit_type:?}"),
Expand Down Expand Up @@ -234,12 +204,28 @@ impl DateDiffFunc {
ColumnarValue::Array(Arc::new(diff))
}

fn diff(diff_arr: &DurationNanosecondArray, coef: i64) -> ColumnarValue {
let diff_arr: Int64Array = diff_arr.unary(|x| {
let div = x / coef;
if x % coef == 0 { div } else { div + 1 }
});
ColumnarValue::Array(Arc::new(diff_arr))
// Snowflake's DATEDIFF returns the number of `part`-boundaries crossed
// between the two endpoints, not the fractional elapsed duration. We
// implement that by truncating each endpoint to `coef` precision
// independently (floor division) and subtracting the integer quotients.
fn boundary_diff(
lhs: &Arc<dyn Array>,
rhs: &Arc<dyn Array>,
coef: i64,
) -> Result<ColumnarValue> {
let lhs_i64 = cast(lhs, &DataType::Int64)?;
let rhs_i64 = cast(rhs, &DataType::Int64)?;
let a = as_int64_array(&lhs_i64)?;
let b = as_int64_array(&rhs_i64)?;
let result: Int64Array = a
.iter()
.zip(b.iter())
.map(|(a, b)| match (a, b) {
(Some(a), Some(b)) => Some(b.div_euclid(coef) - a.div_euclid(coef)),
_ => None,
})
.collect();
Ok(ColumnarValue::Array(Arc::new(result)))
}
}

Expand Down
61 changes: 61 additions & 0 deletions crates/functions/src/tests/datetime/datediff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,64 @@ test_query!(
CAST('1970-02-01 00:15:00' AS TIMESTAMP)) AS date_time",
snapshot_path = "datediff"
);

// DATEDIFF uses boundary-count semantics (matches Snowflake), not
// ceiling-of-duration. These cases all produce 0 because the endpoints
// sit in the same `part` bucket even though the true duration is positive.
test_query!(
boundary_count_same_bucket,
"SELECT
DATEDIFF('second',
TIMESTAMP '2020-01-01 00:00:00.100',
TIMESTAMP '2020-01-01 00:00:00.900') AS sec_sub,
DATEDIFF('minute',
TIMESTAMP '2020-01-01 00:00:05',
TIMESTAMP '2020-01-01 00:00:55') AS min_sub,
DATEDIFF('hour',
TIMESTAMP '2020-01-01 01:30:00',
TIMESTAMP '2020-01-01 01:50:00') AS hour_sub,
DATEDIFF('day',
TIMESTAMP '2020-01-01 08:00:00',
TIMESTAMP '2020-01-01 20:00:00') AS day_sub;",
snapshot_path = "datediff"
);

// Endpoints straddle a single boundary: DATEDIFF returns 1 even when the
// true elapsed duration is less than one full unit.
test_query!(
boundary_count_straddle,
"SELECT
DATEDIFF('second',
TIMESTAMP '2020-01-01 00:00:00.900',
TIMESTAMP '2020-01-01 00:00:01.100') AS sec_straddle,
DATEDIFF('minute',
TIMESTAMP '2020-01-01 01:00:55',
TIMESTAMP '2020-01-01 01:01:05') AS min_straddle,
DATEDIFF('hour',
TIMESTAMP '2020-01-01 01:55:00',
TIMESTAMP '2020-01-01 02:05:00') AS hour_straddle,
DATEDIFF('day',
TIMESTAMP '2020-01-01 23:00:00',
TIMESTAMP '2020-01-02 01:00:00') AS day_straddle;",
snapshot_path = "datediff"
);

// Counts boundaries, not rounded duration: a 1.5-second span that crosses
// exactly one second-boundary returns 1, not 2 (CEIL(1.5) = 2 would be wrong).
test_query!(
boundary_count_not_ceiling,
"SELECT
DATEDIFF('second',
TIMESTAMP '2020-01-01 00:00:00.250',
TIMESTAMP '2020-01-01 00:00:01.750') AS sec_1_5,
DATEDIFF('second',
TIMESTAMP '2020-01-01 00:00:00.500',
TIMESTAMP '2020-01-01 00:00:02.900') AS sec_2_4,
DATEDIFF('second',
TIMESTAMP '2020-01-01 00:00:00.000',
TIMESTAMP '2020-01-01 00:00:02.500') AS sec_2_5,
DATEDIFF('hour',
TIMESTAMP '2020-01-01 01:30:00',
TIMESTAMP '2020-01-01 02:30:00') AS hour_1h_two_buckets;",
snapshot_path = "datediff"
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: crates/functions/src/tests/datetime/datediff.rs
assertion_line: 105
description: "\"SELECT\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.250',\n TIMESTAMP '2020-01-01 00:00:01.750') AS sec_1_5,\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.500',\n TIMESTAMP '2020-01-01 00:00:02.900') AS sec_2_4,\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.000',\n TIMESTAMP '2020-01-01 00:00:02.500') AS sec_2_5,\n DATEDIFF('hour',\n TIMESTAMP '2020-01-01 01:30:00',\n TIMESTAMP '2020-01-01 02:30:00') AS hour_1h_two_buckets;\""
---
Ok(
[
"+---------+---------+---------+---------------------+",
"| sec_1_5 | sec_2_4 | sec_2_5 | hour_1h_two_buckets |",
"+---------+---------+---------+---------------------+",
"| 1 | 2 | 2 | 1 |",
"+---------+---------+---------+---------------------+",
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: crates/functions/src/tests/datetime/datediff.rs
assertion_line: 65
description: "\"SELECT\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.100',\n TIMESTAMP '2020-01-01 00:00:00.900') AS sec_sub,\n DATEDIFF('minute',\n TIMESTAMP '2020-01-01 00:00:05',\n TIMESTAMP '2020-01-01 00:00:55') AS min_sub,\n DATEDIFF('hour',\n TIMESTAMP '2020-01-01 01:30:00',\n TIMESTAMP '2020-01-01 01:50:00') AS hour_sub,\n DATEDIFF('day',\n TIMESTAMP '2020-01-01 08:00:00',\n TIMESTAMP '2020-01-01 20:00:00') AS day_sub;\""
---
Ok(
[
"+---------+---------+----------+---------+",
"| sec_sub | min_sub | hour_sub | day_sub |",
"+---------+---------+----------+---------+",
"| 0 | 0 | 0 | 0 |",
"+---------+---------+----------+---------+",
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: crates/functions/src/tests/datetime/datediff.rs
assertion_line: 85
description: "\"SELECT\n DATEDIFF('second',\n TIMESTAMP '2020-01-01 00:00:00.900',\n TIMESTAMP '2020-01-01 00:00:01.100') AS sec_straddle,\n DATEDIFF('minute',\n TIMESTAMP '2020-01-01 01:00:55',\n TIMESTAMP '2020-01-01 01:01:05') AS min_straddle,\n DATEDIFF('hour',\n TIMESTAMP '2020-01-01 01:55:00',\n TIMESTAMP '2020-01-01 02:05:00') AS hour_straddle,\n DATEDIFF('day',\n TIMESTAMP '2020-01-01 23:00:00',\n TIMESTAMP '2020-01-02 01:00:00') AS day_straddle;\""
---
Ok(
[
"+--------------+--------------+---------------+--------------+",
"| sec_straddle | min_straddle | hour_straddle | day_straddle |",
"+--------------+--------------+---------------+--------------+",
"| 1 | 1 | 1 | 1 |",
"+--------------+--------------+---------------+--------------+",
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Ok(
"+---------+---------+---------+---------+-----------+",
"| ts_date | date_ts | ts_time | time_ts | date_time |",
"+---------+---------+---------+---------+-----------+",
"| 6 | 7 | 5 | 5 | 44655 |",
"| 6 | 6 | 5 | 5 | 44655 |",
"+---------+---------+---------+---------+-----------+",
],
)