diff --git a/Cargo.lock b/Cargo.lock index 1d87b2f69..10bf0e1ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -916,6 +916,7 @@ dependencies = [ "tinystr", "tzif", "yoke", + "zerofrom", "zerotrie", "zerovec", "zoneinfo64", diff --git a/Cargo.toml b/Cargo.toml index e1c16ed53..073653d54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,7 +97,7 @@ web-time = { workspace = true, optional = true } iana-time-zone = { workspace = true, optional = true } [dev-dependencies] -timezone_provider = { workspace = true, features = ["zoneinfo64"] } +timezone_provider = { workspace = true, features = ["zoneinfo64", "experimental_tzif"] } zoneinfo64 = { workspace = true } resb = "0.1.0" diff --git a/provider/Cargo.toml b/provider/Cargo.toml index f0274f148..f0bb50165 100644 --- a/provider/Cargo.toml +++ b/provider/Cargo.toml @@ -37,7 +37,7 @@ datagen = [ ] std = [] # Experimental tzif/tzdb compiled data -experimental_tzif = [] +experimental_tzif = ["dep:zerofrom", "zerofrom/derive"] # Performing timezone resolution with the `tzif` crate tzif = ["dep:tzif", @@ -63,6 +63,7 @@ zoneinfo_rs = { workspace = true, features = ["std"], optional = true } tzif = { workspace = true, optional = true } jiff-tzdb = { workspace = true, optional = true } combine = { workspace = true, optional = true } +zerofrom = { version = "0.1.6", optional = true } # zoneinfo64 dependency zoneinfo64 = { workspace = true, optional = true } diff --git a/provider/README.md b/provider/README.md index e1d849f4a..481ca265c 100644 --- a/provider/README.md +++ b/provider/README.md @@ -22,11 +22,11 @@ Below is a list of currently available time zone providers. - `ZoneInfo64TzdbProvider`: a provider using ICU4C's zoneinfo64 resource bundle (enable with `zoneinfo64` features flag) - `FsTzdbProvider`: a provider that reads and parses tzdata at runtime from the host file system's -TZif files (enable with `tzif` feature flag) -- `CompiledTzdbProvider`: a provider that reads and parses tzdata at runtime from TZif's compiled -into the application (enable with `tzif` feature flag) - -Coming soon (hopefully), a zero copy compiled tzdb provider (see `experimental_tzif` for more). + TZif files (enable with `tzif` feature flag) +- `CompiledTzdbProvider`: a provider that reads and parses tzdata at runtime from TZifs compiled + into the application (enable with `tzif` feature flag) +- `ZeroCompiledTzdbProvider`: a provider that deserializes time zone data from TZifs compiled + into the application (enable with `experimental_tzif` feature flag) ### Time zone provider traits diff --git a/provider/src/common.rs b/provider/src/common.rs new file mode 100644 index 000000000..f28042d3a --- /dev/null +++ b/provider/src/common.rs @@ -0,0 +1,323 @@ +//! Common logic shared across TZif providers + +use crate::{ + provider::{GapEntryOffsets, UtcOffsetSeconds}, + utils, +}; + +use core::ops::Range; + +#[cfg(feature = "tzif")] +use tzif::data::{ + posix::{DstTransitionInfo, PosixTzString, TransitionDate, TransitionDay}, + tzif::LocalTimeTypeRecord, +}; + +// TODO: Workshop record name? +/// The `LocalTimeRecord` result represents the result of searching for a +/// time zone transition without the offset seconds applied to the +/// epoch seconds. +/// +/// As a result of the search, it is possible for the resulting search to be either +/// Empty (due to an invalid time being provided that would be in the +1 tz shift) +/// or two time zones (when a time exists in the ambiguous range of a -1 shift). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum LocalTimeRecordResult { + Empty(GapEntryOffsets), + Single(UtcOffsetSeconds), + Ambiguous { + first: UtcOffsetSeconds, + second: UtcOffsetSeconds, + }, +} + +/// `TimeZoneTransitionInfo` represents information about a timezone transition. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct TimeZoneTransitionInfo { + /// The transition time epoch at which the offset needs to be applied. + pub transition_epoch: Option, + /// The time zone offset in seconds. + pub offset: UtcOffsetSeconds, +} + +impl From for LocalTimeRecordResult { + fn from(value: UtcOffsetSeconds) -> Self { + Self::Single(value) + } +} + +#[cfg(feature = "tzif")] +impl From for LocalTimeRecordResult { + fn from(value: LocalTimeTypeRecord) -> Self { + Self::Single(value.into()) + } +} + +#[cfg(feature = "tzif")] +impl From<(LocalTimeTypeRecord, LocalTimeTypeRecord)> for LocalTimeRecordResult { + fn from(value: (LocalTimeTypeRecord, LocalTimeTypeRecord)) -> Self { + Self::Ambiguous { + first: value.0.into(), + second: value.1.into(), + } + } +} + +/// Stores the information about DST transitions for a given year +pub(crate) struct DstTransitionInfoForYear { + pub(crate) dst_start_seconds: i64, + pub(crate) dst_end_seconds: i64, + pub(crate) std_offset: UtcOffsetSeconds, + pub(crate) dst_offset: UtcOffsetSeconds, +} + +impl DstTransitionInfoForYear { + #[cfg(feature = "tzif")] + pub(crate) fn compute( + posix_tz_string: &PosixTzString, + dst_variant: &DstTransitionInfo, + year: i32, + ) -> Self { + let std_offset = UtcOffsetSeconds::from(&posix_tz_string.std_info); + let dst_offset = UtcOffsetSeconds::from(&dst_variant.variant_info); + let dst_start_seconds = + calculate_transition_seconds_for_year(year, dst_variant.start_date, std_offset); + let dst_end_seconds = + calculate_transition_seconds_for_year(year, dst_variant.end_date, dst_offset); + Self { + dst_start_seconds, + dst_end_seconds, + std_offset, + dst_offset, + } + } + + // Returns the range between offsets in this year + // This may cover DST or standard time, whichever starts first + pub(crate) fn transition_range(&self) -> Range { + if self.dst_start_seconds > self.dst_end_seconds { + self.dst_end_seconds..self.dst_start_seconds + } else { + self.dst_start_seconds..self.dst_end_seconds + } + } +} + +#[cfg(feature = "tzif")] +pub(crate) fn calculate_transition_seconds_for_year( + year: i32, + transition_date: TransitionDate, + offset: UtcOffsetSeconds, +) -> i64 { + // Determine the year of the requested time. + let year_epoch_seconds = i64::from(utils::epoch_days_for_year(year)) * 86400; + let is_leap = utils::is_leap(year); + + // Calculate the days in the year for the TransitionDate + // This value is zero-indexed so it can be added to the year's epoch seconds + let days = match transition_date.day { + TransitionDay::NoLeap(day) if day > 59 => day - 1 + is_leap as u16, + TransitionDay::NoLeap(day) => day - 1, + TransitionDay::WithLeap(day) => day, + TransitionDay::Mwd(month, week, day) => { + let days_to_month = utils::month_to_day((month - 1) as u8, is_leap); + let days_in_month = u16::from(utils::iso_days_in_month(year, month as u8)); + + // Month starts in the day... + let day_offset = (u16::from(utils::epoch_seconds_to_day_of_week(year_epoch_seconds)) + + days_to_month) + .rem_euclid(7); + + // EXAMPLE: + // + // 0 1 2 3 4 5 6 + // sun mon tue wed thu fri sat + // - - - 0 1 2 3 + // 4 5 6 7 8 9 10 + // 11 12 13 14 15 16 17 + // 18 19 20 21 22 23 24 + // 25 26 27 28 29 30 - + // + // The day_offset = 3, since the month starts on a wednesday. + // + // We're looking for the second friday of the month. Thus, since the month started before + // a friday, we need to start counting from week 0: + // + // day_of_month = (week - u16::from(day_offset <= day)) * 7 + day - day_offset = (2 - 1) * 7 + 5 - 3 = 9 + // + // This works if the month started on a day before the day we want (day_offset <= day). However, if that's not the + // case, we need to start counting on week 1. For example, calculate the day of the month for the third monday + // of the month: + // + // day_of_month = (week - u16::from(day_offset <= day)) * 7 + day - day_offset = (3 - 0) * 7 + 1 - 3 = 19 + + // Note: this day_of_month is zero-indexed! + let mut day_of_month = (week - u16::from(day_offset <= day)) * 7 + day - day_offset; + + // Week 5 actually means "last of month". The day_of_month calculation + // above uses `week` directly; so we might end up spilling into the next month. In that + // case, we normalize to the fourth week of the month. + // + // Note that this only needs to be done once; if a month will have at least four of each + // day of the week since all months have 28 days or greater. + // + // We add one because day_of_month is zero_indexed + if day_of_month + 1 > days_in_month { + day_of_month -= 7 + } + + days_to_month + day_of_month + } + }; + + // Transition time is on local time, so we need to add the UTC offset to get the correct UTC timestamp + // for the transition. + year_epoch_seconds + i64::from(days) * 86400 + transition_date.time.0 - offset.0 +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum TransitionType { + Dst, + Std, +} + +impl TransitionType { + pub(crate) fn invert(&mut self) { + *self = match *self { + Self::Dst => Self::Std, + Self::Std => Self::Dst, + } + } +} + +/// The month, week of month, and day of week value built into the POSIX tz string. +/// +/// For more information, see the [POSIX tz string docs](https://sourceware.org/glibc/manual/2.40/html_node/Proleptic-TZ.html) +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) struct Mwd { + month: u8, + week: u8, + day: u8, +} + +impl Mwd { + #[cfg(feature = "tzif")] + pub(crate) fn from_u16(month: u16, week: u16, day: u16) -> Self { + Self::from_u8( + u8::try_from(month).unwrap_or(0), + u8::try_from(week).unwrap_or(0), + u8::try_from(day).unwrap_or(0), + ) + } + + pub(crate) fn from_u8(month: u8, week: u8, day: u8) -> Self { + Self { month, week, day } + } + + /// Given the day of the week of the 0th day in this month, + /// normalize the week to being a week number (1 = first week, ...) + /// rather than a weekday ordinal (1 = first friday, etc) + pub(crate) fn normalize_to_week_number(&mut self, day_of_week_zeroth_day: u8) { + if self.day <= day_of_week_zeroth_day { + self.week += 1; + } + } +} + +/// Represents an MWD for a given time +#[derive(Debug)] +pub(crate) struct MwdForTime { + /// This will never have day = 5 + pub(crate) mwd: Mwd, + /// The day of the week of the 0th day (the day before the month starts) + pub(crate) day_of_week_zeroth_day: u8, + /// This is the day of week of the 29th and the last day of the month, + /// if the month has more than 28 days. + /// Basically, this is the start and end of the "fifth $weekday of the month" period + pub(crate) extra_days: Option<(u8, u8)>, +} + +impl MwdForTime { + #[cfg(any(feature = "tzif", feature = "experimental_tzif"))] + pub(crate) fn from_seconds(seconds: i64) -> Self { + let (year, month, day_of_month) = utils::ymd_from_epoch_milliseconds(seconds * 1_000); + let week_of_month = day_of_month / 7 + 1; + let day_of_week = utils::epoch_seconds_to_day_of_week(seconds); + let mut mwd = Mwd::from_u8(month, week_of_month, day_of_week); + let days_in_month = utils::iso_days_in_month(year, month); + let day_of_week_zeroth_day = + (i16::from(day_of_week) - i16::from(day_of_month)).rem_euclid(7) as u8; + mwd.normalize_to_week_number(day_of_week_zeroth_day); + if day_of_month > 28 { + let day_of_week_day_29 = (day_of_week_zeroth_day + 29).rem_euclid(7); + let day_of_week_last_day = (day_of_week_zeroth_day + days_in_month).rem_euclid(7); + Self { + mwd, + day_of_week_zeroth_day, + extra_days: Some((day_of_week_day_29, day_of_week_last_day)), + } + } else { + // No day 5 + Self { + mwd, + day_of_week_zeroth_day, + extra_days: None, + } + } + } + + /// MWDs from Posix data can contain `w=5`, which means the *last* $weekday of the month, + /// not the 5th. For MWDs in the same month, this normalizes the 5 to the actual number of the + /// last weekday of the month (5 or 4) + /// + /// Furthermore, this turns the week number into a true week number: the "second friday in March" + /// will be turned into "the friday in the first week of March" or "the Friday in the second week of March" + /// depending on when March starts. + /// + /// This normalization *only* applies to MWDs in the same month. For other MWDs, such normalization is irrelevant. + pub(crate) fn normalize_mwd(&self, other: &mut Mwd) { + // If we're in the same month, normalization will actually have a useful effect + if self.mwd.month == other.month { + // First normalize MWDs that are like "the last $weekday in the month" + // the last $weekday in the month, we need special handling + if other.week == 5 { + if let Some((day_29, last_day)) = self.extra_days { + if day_29 < last_day { + if other.day < day_29 || other.day > last_day { + // This day isn't found in the last week. Subtract one. + other.week = 4; + } + } else { + // The extra part of the month crosses Sunday + if other.day < day_29 && other.day > last_day { + // This day isn't found in the last week. Subtract one. + other.week = 4; + } + } + } else { + // There is no week 5 in this month, normalize to 4 + other.week = 4; + } + } + + other.normalize_to_week_number(self.day_of_week_zeroth_day); + } + } +} + +pub(crate) fn offset_range(offset_one: i64, offset_two: i64) -> core::ops::Range { + if offset_one < offset_two { + return offset_one..offset_two; + } + offset_two..offset_one +} + +#[derive(Debug)] +pub(crate) enum TransitionKind { + // The offsets didn't change (happens when abbreviations/savings values change) + Smooth, + // The offsets changed in a way that leaves a gap + Gap, + // The offsets changed in a way that produces overlapping time. + Overlap, +} diff --git a/provider/src/epoch_nanoseconds.rs b/provider/src/epoch_nanoseconds.rs index 4dcd9fbc7..302958e5d 100644 --- a/provider/src/epoch_nanoseconds.rs +++ b/provider/src/epoch_nanoseconds.rs @@ -57,7 +57,11 @@ impl From for EpochNanoseconds { } #[inline] -#[cfg(any(feature = "tzif", feature = "zoneinfo64"))] +#[cfg(any( + feature = "tzif", + feature = "zoneinfo64", + feature = "experimental_tzif" +))] pub(crate) fn seconds_to_nanoseconds(seconds: i64) -> i128 { seconds as i128 * NS_IN_S } diff --git a/provider/src/experimental_tzif/mod.rs b/provider/src/experimental_tzif/mod.rs index 43b18e918..354d5686d 100644 --- a/provider/src/experimental_tzif/mod.rs +++ b/provider/src/experimental_tzif/mod.rs @@ -7,15 +7,21 @@ #[cfg(feature = "datagen")] mod datagen; pub mod posix; +pub mod provider; use zerotrie::ZeroAsciiIgnoreCaseTrie; use zerovec::{vecs::Index32, VarZeroVec, ZeroVec}; use posix::PosixZone; +use provider::ZeroCompiledZoneInfo; -use crate as timezone_provider; +use crate::{self as timezone_provider, provider::NormalizerAndResolver, CompiledNormalizer}; compiled_zoneinfo_provider!(COMPILED_ZONEINFO_PROVIDER); +/// `ZeroCompiledTzdbProvider` is zero-copy compiled time zone database provider. +pub type ZeroCompiledTzdbProvider<'a> = + NormalizerAndResolver; + #[derive(Debug, Clone)] #[cfg_attr( feature = "datagen", @@ -62,7 +68,7 @@ pub struct ZeroTzif<'data> { } #[zerovec::make_ule(LocalTimeRecordULE)] -#[derive(PartialEq, Eq, Debug, Clone, Copy, PartialOrd, Ord)] +#[derive(PartialEq, Eq, Default, Debug, Clone, Copy, PartialOrd, Ord)] #[cfg_attr( feature = "datagen", derive(yoke::Yokeable, serde::Serialize, databake::Bake) diff --git a/provider/src/experimental_tzif/posix.rs b/provider/src/experimental_tzif/posix.rs index 04e67f9c3..deca061c5 100644 --- a/provider/src/experimental_tzif/posix.rs +++ b/provider/src/experimental_tzif/posix.rs @@ -1,3 +1,13 @@ +use crate::{ + common::{ + offset_range, DstTransitionInfoForYear, LocalTimeRecordResult, Mwd, MwdForTime, + TimeZoneTransitionInfo, TransitionType, + }, + epoch_nanoseconds::EpochNanoseconds, + provider::{GapEntryOffsets, TimeZoneProviderResult, UtcOffsetSeconds}, + utils, TimeZoneProviderError, +}; + use tinystr::TinyAsciiStr; #[cfg(feature = "datagen")] use zoneinfo_rs::posix::{MonthWeekDay, PosixDate, PosixDateTime, PosixTimeZone, PosixTransition}; @@ -15,6 +25,329 @@ pub struct PosixZone { pub transition: Option, } +impl PosixZone { + pub(crate) fn resolve_for_local_seconds( + &self, + local_seconds: i64, + ) -> TimeZoneProviderResult { + let Some(transition_info) = &self.transition else { + // Regardless of the time, there is one variant and we can return it. + let offset = UtcOffsetSeconds(self.offset); + return Ok(LocalTimeRecordResult::Single(offset)); + }; + + // NOTE: + // STD -> DST == start + // DST -> STD == end + let (is_transition_day, mut is_dst) = cmp_seconds_to_transitions( + &transition_info.start.date, + &transition_info.end.date, + local_seconds, + )?; + + if is_transition_day { + let time = utils::epoch_ms_to_ms_in_day(local_seconds * 1_000) as i64 / 1_000; + let transition_time = if is_dst == TransitionType::Dst { + transition_info.start.time + } else { + transition_info.end.time + }; + // Convert to UtcOffsetSeconds so that these behave like + // normal offsets + let std = UtcOffsetSeconds(self.offset); + let dst = UtcOffsetSeconds(self.offset + transition_info.savings); + let transition_diff = if is_dst == TransitionType::Dst { + dst.0 - std.0 + } else { + std.0 - dst.0 + }; + let offset = offset_range(transition_time + transition_diff, transition_time); + match offset.contains(&time) { + true if is_dst == TransitionType::Dst => { + return Ok(LocalTimeRecordResult::Empty(GapEntryOffsets { + offset_before: std, + offset_after: dst, + transition_epoch: EpochNanoseconds::from_seconds(transition_time), + })); + } + true => { + // Note(nekevss, manishearth): We may need to more carefully + // handle inverse DST here. + return Ok(LocalTimeRecordResult::Ambiguous { + first: dst, + second: std, + }); + } + _ => {} + } + + // We were not contained in the transition above, + // AND we are before it, which means we are actually in + // the other transition! + // + // NOTE(Manishearth) do we need to do anything special + // here if we end up back at the tzif transition data? + if time < offset.start { + is_dst.invert(); + } + } + + match is_dst { + TransitionType::Dst => { + Ok(UtcOffsetSeconds(self.offset + transition_info.savings).into()) + } + TransitionType::Std => Ok(UtcOffsetSeconds(self.offset).into()), + } + } + + pub(crate) fn resolve_for_epoch_seconds( + &self, + epoch_seconds: i64, + ) -> TimeZoneProviderResult { + let Some(dst_transition_info) = &self.transition else { + // Regardless of the time, there is one variant and we can return it. + return Ok(TimeZoneTransitionInfo { + transition_epoch: None, + offset: UtcOffsetSeconds(self.offset), + }); + }; + + let year = utils::epoch_time_to_iso_year(epoch_seconds * 1000); + + let transition_info = DstTransitionInfoForYear::compute_zero_transition( + self.offset, + dst_transition_info, + year, + ); + let dst_start_seconds = transition_info.dst_start_seconds; + let dst_end_seconds = transition_info.dst_end_seconds; + + // Need to determine if the range being tested is standard or savings time. + let dst_is_inversed = dst_end_seconds < dst_start_seconds; + + // We have potentially to different variations of the DST start and end time. + // + // Northern hemisphere: dst_start -> dst_end + // Southern hemisphere: dst_end -> dst_start + // + // This is primarily due to the summer / winter months of those areas. + // + // For the northern hemispere, we can check if the range contains the seconds. For the + // southern hemisphere, we check if the range does no contain the value. + let should_return_dst = (!dst_is_inversed + && (dst_start_seconds..dst_end_seconds).contains(&epoch_seconds)) + || (dst_is_inversed && !(dst_end_seconds..dst_start_seconds).contains(&epoch_seconds)); + + // Expanding on the above, the state of time zones in the year are: + // + // Northern hemisphere: STD -> DST -> STD + // Southern hemisphere: DST -> STD -> DST + // + // This is simple for the returning the offsets, but if the seconds value falls into the first + // available rule. However, the northern hemisphere's first STD rule and the Southern hemisphere's + // first DST rule will have different transition times that are based in the year prior, so if the + // requested seconds falls in that range, we calculate the transition time for the prior year. + let (new_offset, transition_epoch) = if should_return_dst { + let transition_epoch = if dst_is_inversed && epoch_seconds < dst_end_seconds { + Some(calculate_transition_seconds_for_year( + year - 1, + dst_transition_info.start, + transition_info.dst_offset, + )) + } else { + Some(dst_start_seconds) + }; + (transition_info.dst_offset, transition_epoch) + } else { + let transition_epoch = if !dst_is_inversed && epoch_seconds < dst_start_seconds { + Some(calculate_transition_seconds_for_year( + year - 1, + dst_transition_info.end, + transition_info.std_offset, + )) + } else { + Some(dst_end_seconds) + }; + (transition_info.std_offset, transition_epoch) + }; + Ok(TimeZoneTransitionInfo { + offset: new_offset, + transition_epoch, + }) + } +} + +impl DstTransitionInfoForYear { + pub(crate) fn compute_zero_transition( + std_offset_seconds: i64, + dst_transition: &ZeroPosixTransition, + year: i32, + ) -> Self { + let std_offset = UtcOffsetSeconds(std_offset_seconds); + let dst_offset = UtcOffsetSeconds(std_offset_seconds + dst_transition.savings); + let dst_start_seconds = + calculate_transition_seconds_for_year(year, dst_transition.start, std_offset); + let dst_end_seconds = + calculate_transition_seconds_for_year(year, dst_transition.end, dst_offset); + Self { + dst_start_seconds, + dst_end_seconds, + std_offset, + dst_offset, + } + } +} + +fn calculate_transition_seconds_for_year( + year: i32, + transition_dt: ZeroTransitionDateTime, + offset: UtcOffsetSeconds, +) -> i64 { + // Determine the year of the requested time. + let year_epoch_seconds = i64::from(utils::epoch_days_for_year(year)) * 86400; + let is_leap = utils::is_leap(year); + + // Calculate the days in the year for the TransitionDate + // This value is zero-indexed so it can be added to the year's epoch seconds + let days = match transition_dt.date { + ZeroTransitionDate { + kind: DateKind::JulianNoLeap, + day: Some(day), + .. + } => day, + ZeroTransitionDate { + kind: DateKind::Julian, + day: Some(day), + .. + } => day + 1, + ZeroTransitionDate { + kind: DateKind::MonthWeekDay, + mwd: Some((month, week, day)), + .. + } => { + let days_to_month = utils::month_to_day(month - 1, is_leap); + let days_in_month = utils::iso_days_in_month(year, month); + + // Month starts in the day... + let day_offset = (u16::from(utils::epoch_seconds_to_day_of_week(year_epoch_seconds)) + + days_to_month) + .rem_euclid(7) as u8; + + // EXAMPLE: + // + // 0 1 2 3 4 5 6 + // sun mon tue wed thu fri sat + // - - - 0 1 2 3 + // 4 5 6 7 8 9 10 + // 11 12 13 14 15 16 17 + // 18 19 20 21 22 23 24 + // 25 26 27 28 29 30 - + // + // The day_offset = 3, since the month starts on a wednesday. + // + // We're looking for the second friday of the month. Thus, since the month started before + // a friday, we need to start counting from week 0: + // + // day_of_month = (week - u16::from(day_offset <= day)) * 7 + day - day_offset = (2 - 1) * 7 + 5 - 3 = 9 + // + // This works if the month started on a day before the day we want (day_offset <= day). However, if that's not the + // case, we need to start counting on week 1. For example, calculate the day of the month for the third monday + // of the month: + // + // day_of_month = (week - u16::from(day_offset <= day)) * 7 + day - day_offset = (3 - 0) * 7 + 1 - 3 = 19 + + // Note: this day_of_month is zero-indexed! + let mut day_of_month = (week - u8::from(day_offset <= day)) * 7 + day - day_offset; + + // Week 5 actually means "last of month". The day_of_month calculation + // above uses `week` directly; so we might end up spilling into the next month. In that + // case, we normalize to the fourth week of the month. + // + // Note that this only needs to be done once; if a month will have at least four of each + // day of the week since all months have 28 days or greater. + // + // We add one because day_of_month is zero_indexed + if day_of_month + 1 > days_in_month { + day_of_month -= 7 + } + + days_to_month + day_of_month as u16 + } + _ => panic!("Invalid TransitionDate found."), + }; + + // Transition time is on local time, so we need to add the UTC offset to get the correct UTC timestamp + // for the transition. + year_epoch_seconds + i64::from(days) * 86400 + transition_dt.time - offset.0 +} + +fn cmp_seconds_to_transitions( + start: &ZeroTransitionDate, + end: &ZeroTransitionDate, + seconds: i64, +) -> TimeZoneProviderResult<(bool, TransitionType)> { + // Assert the kinds are equal + assert_eq!(start.kind, end.kind); + + let cmp_result = match (start.to_enum(), end.to_enum()) { + ( + TransitionDate::Mwd((start_month, start_week, start_day)), + TransitionDate::Mwd((end_month, end_week, end_day)), + ) => { + let mwd = MwdForTime::from_seconds(seconds); + let mut start = Mwd::from_u8(start_month, start_week, start_day); + let mut end = Mwd::from_u8(end_month, end_week, end_day); + + mwd.normalize_mwd(&mut start); + mwd.normalize_mwd(&mut end); + + let is_transition = start == mwd.mwd || end == mwd.mwd; + let is_dst = if start > end { + mwd.mwd < end || start <= mwd.mwd + } else { + start <= mwd.mwd && mwd.mwd < end + }; + + (is_transition, is_dst) + } + (TransitionDate::Julian(start), TransitionDate::Julian(end)) => { + let day_in_year = utils::epoch_time_to_day_in_year(seconds * 1_000) as u16; + let is_transition = start == day_in_year || end == day_in_year; + let is_dst = if start > end { + day_in_year < end || start <= day_in_year + } else { + start <= day_in_year && day_in_year < end + }; + (is_transition, is_dst) + } + // TODO: do we need to modify the logic for leap years? + (TransitionDate::JulianNoLeap(start), TransitionDate::JulianNoLeap(end)) => { + let day_in_year = utils::epoch_time_to_day_in_year(seconds * 1_000) as u16; + let is_transition = start == day_in_year || end == day_in_year; + let is_dst = if start > end { + day_in_year < end || start <= day_in_year + } else { + start <= day_in_year && day_in_year < end + }; + (is_transition, is_dst) + } + // NOTE: The assumption here is that mismatched day types on + // a POSIX string is an illformed string. + _ => { + return Err(TimeZoneProviderError::Assert( + "Mismatched day types on a POSIX string.", + )) + } + }; + + match cmp_result { + (true, dst) if dst => Ok((true, TransitionType::Dst)), + (true, _) => Ok((true, TransitionType::Std)), + (false, dst) if dst => Ok((false, TransitionType::Dst)), + (false, _) => Ok((false, TransitionType::Std)), + } +} + #[cfg(feature = "datagen")] #[allow(clippy::unwrap_used, reason = "Datagen only")] impl From<&PosixTimeZone> for PosixZone { @@ -89,6 +422,13 @@ impl From<&PosixDateTime> for ZeroTransitionDateTime { } } +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) enum TransitionDate { + Julian(u16), + JulianNoLeap(u16), + Mwd((u8, u8, u8)), +} + #[zerovec::make_ule(DateULE)] #[derive(PartialEq, Eq, Debug, Clone, Copy, PartialOrd, Ord)] #[cfg_attr( @@ -102,6 +442,29 @@ pub struct ZeroTransitionDate { pub mwd: Option<(u8, u8, u8)>, } +impl ZeroTransitionDate { + pub(crate) fn to_enum(self) -> TransitionDate { + match self { + ZeroTransitionDate { + kind: DateKind::JulianNoLeap, + day: Some(day), + .. + } => TransitionDate::JulianNoLeap(day), + ZeroTransitionDate { + kind: DateKind::Julian, + day: Some(day), + .. + } => TransitionDate::Julian(day), + ZeroTransitionDate { + kind: DateKind::MonthWeekDay, + mwd: Some(mwd), + .. + } => TransitionDate::Mwd(mwd), + _ => panic!("Invalid ZeroTransitionDate"), + } + } +} + #[cfg(feature = "datagen")] impl From for ZeroTransitionDate { fn from(value: PosixDate) -> Self { diff --git a/provider/src/experimental_tzif/provider.rs b/provider/src/experimental_tzif/provider.rs new file mode 100644 index 000000000..5c78d9a35 --- /dev/null +++ b/provider/src/experimental_tzif/provider.rs @@ -0,0 +1,604 @@ +//! This module contains the core logic for the zero-copy tzif provider. + +use core::{cmp::Ordering, ops::Range}; + +use super::COMPILED_ZONEINFO_PROVIDER; + +use crate::{ + common::{ + DstTransitionInfoForYear, LocalTimeRecordResult, TimeZoneTransitionInfo, TransitionKind, + }, + epoch_nanoseconds::{seconds_to_nanoseconds, EpochNanoseconds, NS_IN_S}, + experimental_tzif::{LocalTimeRecord, ZeroTzif}, + provider::{ + CandidateEpochNanoseconds, EpochNanosecondsAndOffset, GapEntryOffsets, ResolvedId, + TimeZoneProviderResult, TimeZoneResolver, TransitionDirection, UtcOffsetSeconds, + }, + utils, TimeZoneProviderError, +}; +use zerofrom::ZeroFrom; + +#[derive(Debug, Default)] +pub struct ZeroCompiledZoneInfo; + +impl ZeroCompiledZoneInfo { + pub fn zero_tzif(&self, resolved_id: ResolvedId) -> TimeZoneProviderResult> { + COMPILED_ZONEINFO_PROVIDER + .tzifs + .get(resolved_id.0) + .map(ZeroTzif::zero_from) + .ok_or(TimeZoneProviderError::Range( + "tzif data not found for resolved id", + )) + } +} + +impl TimeZoneResolver for ZeroCompiledZoneInfo { + fn get_id(&self, normalized_identifier: &[u8]) -> TimeZoneProviderResult { + COMPILED_ZONEINFO_PROVIDER + .ids + .get(normalized_identifier) + .map(ResolvedId) + .ok_or(TimeZoneProviderError::Range("identifier does not exist.")) + } + + fn candidate_nanoseconds_for_local_epoch_nanoseconds( + &self, + identifier: ResolvedId, + local_datetime: crate::provider::IsoDateTime, + ) -> TimeZoneProviderResult { + let tzif = self.zero_tzif(identifier)?; + + let epoch_nanos = (local_datetime).as_nanoseconds(); + let mut seconds = (epoch_nanos.0 / NS_IN_S) as i64; + + // We just rounded our ns value to seconds. + // This is fine for positive ns: timezones do not transition at sub-second offsets, + // so the offset at N seconds is always the offset at N.0001 seconds. + // + // However, for negative epochs, the offset at -N seconds might be different + // from that at -N.001 seconds. Instead, we calculate the offset at (-N-1) seconds. + if seconds < 0 { + let remainder = epoch_nanos.0 % NS_IN_S; + if remainder != 0 { + seconds -= 1; + } + } + + let local_time_record_result = tzif.search_candidate_offset(seconds)?; + let result = match local_time_record_result { + LocalTimeRecordResult::Empty(bounds) => CandidateEpochNanoseconds::Zero(bounds), + LocalTimeRecordResult::Single(r) => { + let epoch_ns = EpochNanoseconds::from(epoch_nanos.0 - seconds_to_nanoseconds(r.0)); + CandidateEpochNanoseconds::One(EpochNanosecondsAndOffset { + ns: epoch_ns, + offset: r, + }) + } + LocalTimeRecordResult::Ambiguous { first, second } => { + let first_epoch_ns = + EpochNanoseconds::from(epoch_nanos.0 - seconds_to_nanoseconds(first.0)); + let second_epoch_ns = + EpochNanoseconds::from(epoch_nanos.0 - seconds_to_nanoseconds(second.0)); + CandidateEpochNanoseconds::Two([ + EpochNanosecondsAndOffset { + ns: first_epoch_ns, + offset: first, + }, + EpochNanosecondsAndOffset { + ns: second_epoch_ns, + offset: second, + }, + ]) + } + }; + Ok(result) + } + + fn transition_nanoseconds_for_utc_epoch_nanoseconds( + &self, + identifier: ResolvedId, + epoch_nanoseconds: i128, + ) -> TimeZoneProviderResult { + let tzif = self.zero_tzif(identifier)?; + + let mut seconds = (epoch_nanoseconds / NS_IN_S) as i64; + // The rounding is inexact. Transitions are only at second + // boundaries, so the offset at N s is the same as the offset at N.001, + // but the offset at -Ns is not the same as the offset at -N.001, + // the latter matches -N - 1 s instead. + if seconds < 0 && epoch_nanoseconds % NS_IN_S != 0 { + seconds -= 1; + } + tzif.get(seconds).map(|t| t.offset) + } + + fn get_time_zone_transition( + &self, + identifier: ResolvedId, + epoch_nanoseconds: i128, + direction: TransitionDirection, + ) -> TimeZoneProviderResult> { + let tzif = self.zero_tzif(identifier)?; + tzif.get_time_zone_transition(epoch_nanoseconds, direction) + } +} + +impl LocalTimeRecord { + pub fn as_utc_offset_seconds(&self) -> UtcOffsetSeconds { + UtcOffsetSeconds(self.offset) + } +} + +// TODO (nekevss): It would be nice to unify these in the `tzif` crate so that the tzif logic +// is centralized whther using the "zero" or normal version. + +impl<'data> ZeroTzif<'data> { + pub fn get_first_time_zone_offset(&self) -> TimeZoneTransitionInfo { + let offset = self.types.first(); + debug_assert!(offset.is_some(), "tzif internal invariant violated"); + TimeZoneTransitionInfo { + // There was no transition into the first timezone + transition_epoch: None, + offset: offset.unwrap_or_default().as_utc_offset_seconds(), + } + } + + pub fn get_time_zone_offset(&self, idx: usize) -> TimeZoneTransitionInfo { + // NOTE: Transition type can be empty. If no transition_type exists, + // then use 0 as the default index of local_time_type_records. + let offset = self + .types + .get(self.transition_types.get(idx).unwrap_or(0) as usize); + debug_assert!(offset.is_some(), "tzif internal invariant violated"); + TimeZoneTransitionInfo { + transition_epoch: self.transitions.get(idx), + offset: offset.unwrap_or_default().as_utc_offset_seconds(), + } + } + + pub fn get(&self, epoch_seconds: i64) -> TimeZoneProviderResult { + let result = self.transitions.binary_search(&epoch_seconds); + + match result { + // The transition time was given. The transition entries *start* at their + // transition time, so we use the same index + Ok(idx) => Ok(self.get_time_zone_offset(idx)), + // TODO: Double check how the below is handled by zoneinfo_rs + // + // If there are no transitions, local time for all timestamps is specified by the TZ + // string in the footer if present and nonempty; otherwise, it is + // specified by time type 0. + Err(_) if self.transitions.is_empty() => { + if self.types.len() == 1 { + let local_time_record = self + .types + .first() + .ok_or(TimeZoneProviderError::Assert("Out of transition range"))?; + let offset = local_time_record.as_utc_offset_seconds(); + Ok(TimeZoneTransitionInfo { + offset, + transition_epoch: None, + }) + } else { + // Resolve the POSIX time zone. + self.posix.resolve_for_epoch_seconds(epoch_seconds) + } + } + // Our time is before the first transition. + // Get the first timezone offset + Err(0) => Ok(self.get_first_time_zone_offset()), + // Our time is after some transition. + Err(idx) => { + if self.transitions.len() <= idx { + // The transition time provided is beyond the length of + // the available transition time, so the time zone is + // resolved with the POSIX tz string. + let mut offset = self.posix.resolve_for_epoch_seconds(epoch_seconds)?; + if offset.transition_epoch.is_none() { + offset.transition_epoch = Some( + self.transitions + .get(idx - 1) + .ok_or(TimeZoneProviderError::Assert("Out of transition range"))?, + ) + } + return Ok(offset); + } + // binary_search returns the insertion index, which is one after the + // index of the closest lower bound. Fetch that bound. + Ok(self.get_time_zone_offset(idx - 1)) + } + } + } + + pub fn search_candidate_offset( + &self, + local_seconds: i64, + ) -> TimeZoneProviderResult { + let b_search_result = self.transitions.binary_search(&local_seconds); + + let mut estimated_idx = match b_search_result { + Ok(idx) => idx, + Err(idx) => idx, + }; + + if estimated_idx + 1 >= self.transitions.len() { + // If we are *well past* the last transition time, we want + // to use the posix tz string + let mut use_posix = true; + if !self.transitions.is_empty() { + // In case idx was out of bounds, bring it back in + estimated_idx = self.transitions.len() - 1; + let transition_info = self.get_transition_info(estimated_idx); + + // I'm not fully sure if this is correct. + // Is the next_offset valid for the last transition time in its + // vicinity? Probably? It does not seem pleasant to try and do this + // math using half of the transition info and half of the posix info. + // + // TODO(manishearth, nekevss): https://github.com/boa-dev/temporal/issues/469 + if transition_info.transition_time_prev_epoch() > local_seconds + || transition_info.transition_time_next_epoch() > local_seconds + { + // We're before the transition fully ends; we should resolve + // with the regular transition time instead of use_posix + use_posix = false; + } + } + if use_posix { + // The transition time provided is beyond the length of + // the available transition time, so the time zone is + // resolved with the POSIX tz string. + return self.posix.resolve_for_local_seconds(local_seconds); + } + } + + debug_assert!(estimated_idx < self.transitions.len()); + + let transition_info = self.get_transition_info(estimated_idx); + + let range = transition_info.offset_range_local(); + + if range.contains(&local_seconds) { + return Ok(transition_info.record_for_contains()); + } else if local_seconds < range.start { + if estimated_idx == 0 { + // We're at the beginning, there are no timezones before us + // So we just return the first offset + return Ok(LocalTimeRecordResult::Single( + transition_info.prev.as_utc_offset_seconds(), + )); + } + // Otherwise, try the previous offset + estimated_idx -= 1; + } else { + if estimated_idx + 1 == self.transitions.len() { + // We're at the end, return posix instead + return self.posix.resolve_for_local_seconds(local_seconds); + } + // Otherwise, try the next offset + estimated_idx += 1; + } + + let transition_info = self.get_transition_info(estimated_idx); + let range = transition_info.offset_range_local(); + + if range.contains(&local_seconds) { + Ok(transition_info.record_for_contains()) + } else if local_seconds < range.start { + // Note that get_transition_info will correctly fetch the first offset + // into .prev when working with the first transition. + Ok(LocalTimeRecordResult::Single( + transition_info.prev.as_utc_offset_seconds(), + )) + } else { + // We're at the end, return posix instead + if estimated_idx + 1 == self.transitions.len() { + return self.posix.resolve_for_local_seconds(local_seconds); + } + Ok(LocalTimeRecordResult::Single( + transition_info.next.as_utc_offset_seconds(), + )) + } + } + + pub fn get_time_zone_transition( + &self, + epoch_nanoseconds: i128, + direction: TransitionDirection, + ) -> TimeZoneProviderResult> { + // First search the tzif data + + let epoch_seconds = (epoch_nanoseconds / NS_IN_S) as i64; + // When *exactly* on a transition the spec wants you to + // get the next one, so it's important to know if we are + // actually on epoch_seconds or a couple nanoseconds before/after it + // to handle the exact match case + let seconds_is_exact = (epoch_nanoseconds % NS_IN_S) == 0; + let seconds_is_negative = epoch_nanoseconds < 0; + + let b_search_result = self.transitions.binary_search(&epoch_seconds); + + let mut transition_idx = match b_search_result { + Ok(idx) => { + match (direction, seconds_is_exact, seconds_is_negative) { + // If we are N.001 for negative N, the next transition is idx + (TransitionDirection::Next, false, true) => idx, + // If we are exactly N, or N.001 for positive N, the next transition is idx + 1 + (TransitionDirection::Next, true, _) + | (TransitionDirection::Next, false, false) => idx + 1, + // If we are N.001 for positive N, the previous transition the one at idx (= N) + (TransitionDirection::Previous, false, false) => idx, + // If we are exactly N, or N.0001 for negative N, the previous transition is idx - 1 + (TransitionDirection::Previous, true, _) + | (TransitionDirection::Previous, false, true) => { + if let Some(idx) = idx.checked_sub(1) { + idx + } else { + // If we found the first transition, there is no previous one, + // return None + return Ok(None); + } + } + } + } + // idx is insertion index here, so it is the index of the closest upper + // transition + Err(idx) => match direction { + TransitionDirection::Next => idx, + // Special case, we're after the end of the array, we want to make + // sure we hit the POSIX tz handling and we should not subtract one. + TransitionDirection::Previous if idx == self.transitions.len() => idx, + TransitionDirection::Previous => { + // Go one previous + if let Some(idx) = idx.checked_sub(1) { + idx + } else { + // If we found the first transition, there is no previous one, + // return None + return Ok(None); + } + } + }, + }; + + while let Some(tzif_transition) = self.maybe_get_transition_info(transition_idx) { + // This is not a real transition. Skip it. + if tzif_transition.prev.offset == tzif_transition.next.offset { + match direction { + TransitionDirection::Next => transition_idx += 1, + TransitionDirection::Previous if transition_idx == 0 => return Ok(None), + TransitionDirection::Previous => transition_idx -= 1, + } + } else { + return Ok(Some(EpochNanoseconds::from_seconds( + tzif_transition.transition_time, + ))); + } + } + + // We went past the Tzif transitions. We need to handle the posix time zone instead. + let posix = self.posix; + + // The last transition in the tzif tables. + // We should not go back beyond this + let last_tzif_transition = self.transitions.last(); + + // We need to do a similar backwards iteration to find the last real transition. + // Do it only when needed, this case will only show up when walking Previous for a date + // just after the last tzif transition but before the first posix transition. + let last_real_tzif_transition = || { + debug_assert!(direction == TransitionDirection::Previous); + for last_transition_idx in (0..self.transitions.len()).rev() { + if let Some(tzif_transition) = self.maybe_get_transition_info(last_transition_idx) { + if tzif_transition.prev.offset == tzif_transition.next.offset { + continue; + } + return Some(tzif_transition.transition_time); + } + break; + } + None + }; + + let Some(dst_variant) = &self.posix.transition else { + // There are no further transitions. + match direction { + TransitionDirection::Next => return Ok(None), + TransitionDirection::Previous => { + // Go back to the last tzif transition + if last_tzif_transition.is_some() { + if let Some(last_real_tzif_transition) = last_real_tzif_transition() { + return Ok(Some(EpochNanoseconds::from_seconds( + last_real_tzif_transition, + ))); + } + } + return Ok(None); + } + } + }; + + // Calculate year, but clamp it to the last transition + // We do not want to try and apply the posix string to earlier years! + // + // Antarctica/Troll is an example of a timezone that has a posix string + // but no meaningful previous transitions. + let mut epoch_seconds_for_year_calculation = epoch_seconds; + if let Some(last_tzif_transition) = last_tzif_transition { + if epoch_seconds < last_tzif_transition { + epoch_seconds_for_year_calculation = last_tzif_transition; + } + } + let year = utils::epoch_time_to_iso_year(epoch_seconds_for_year_calculation * 1000); + + let transition_info = + DstTransitionInfoForYear::compute_zero_transition(posix.offset, dst_variant, year); + + let range = transition_info.transition_range(); + + let mut seconds = match direction { + TransitionDirection::Next => { + // In exact seconds in the negative case means that (seconds == foo) is actually + // seconds < foo + // + // This code will likely not actually be hit: the current Tzif database has no + // entries with DST offset posix strings where the posix string starts + // before the unix epoch. + let seconds_is_inexact_for_negative = seconds_is_negative && !seconds_is_exact; + // We're before the first transition + if epoch_seconds < range.start + || (epoch_seconds == range.start && seconds_is_inexact_for_negative) + { + range.start + } else if epoch_seconds < range.end + || (epoch_seconds == range.end && seconds_is_inexact_for_negative) + { + // We're between the first and second transition + range.end + } else { + // We're after the second transition + let transition_info = DstTransitionInfoForYear::compute_zero_transition( + posix.offset, + dst_variant, + year + 1, + ); + + transition_info.transition_range().start + } + } + TransitionDirection::Previous => { + // Inexact seconds in the positive case means that (seconds == foo) is actually + // seconds > foo + let seconds_is_ineexact_for_positive = !seconds_is_negative && !seconds_is_exact; + // We're after the second transition + // (note that seconds_is_exact means that epoch_seconds == range.end actually means equality) + if epoch_seconds > range.end + || (epoch_seconds == range.end && seconds_is_ineexact_for_positive) + { + range.end + } else if epoch_seconds > range.start + || (epoch_seconds == range.start && seconds_is_ineexact_for_positive) + { + // We're after the first transition + range.start + } else { + // We're before the first transition + let transition_info = DstTransitionInfoForYear::compute_zero_transition( + posix.offset, + dst_variant, + year - 1, + ); + + transition_info.transition_range().end + } + } + }; + + if let Some(last_tzif_transition) = last_tzif_transition { + // When going Previous, we went back into the area of tzif transition + if seconds < last_tzif_transition { + if let Some(last_real_tzif_transition) = last_real_tzif_transition() { + seconds = last_real_tzif_transition; + } else { + return Ok(None); + } + } + } + + Ok(Some(EpochNanoseconds::from_seconds(seconds))) + } + + fn get_transition_info(&self, idx: usize) -> TransitionInfo { + let info = self.maybe_get_transition_info(idx); + debug_assert!(info.is_some(), "tzif internal invariant violated"); + info.unwrap_or_default() + } + + fn maybe_get_transition_info(&self, idx: usize) -> Option { + let next = self.get_local_time_record(idx); + let transition_time = self.transitions.get(idx)?; + let prev = if idx == 0 { + self.types.first()? + } else { + self.get_local_time_record(idx - 1) + }; + Some(TransitionInfo { + prev, + next, + transition_time, + }) + } + + fn get_local_time_record(&self, idx: usize) -> super::LocalTimeRecord { + // NOTE: Transition type can be empty. If no transition_type exists, + // then use 0 as the default index of local_time_type_records. + let idx = self.transition_types.get(idx).unwrap_or(0); + + let get = self.types.get(idx as usize); + debug_assert!(get.is_some(), "tzif internal invariant violated"); + get.unwrap_or_default() + } +} + +// TODO: Unify with `tzif.rs`'s `TransitionInfo` +#[derive(Debug, Default)] +pub struct TransitionInfo { + pub next: LocalTimeRecord, + pub prev: LocalTimeRecord, + pub transition_time: i64, +} + +impl TransitionInfo { + fn transition_time_prev_epoch(&self) -> i64 { + self.transition_time + self.prev.offset + } + + fn transition_time_next_epoch(&self) -> i64 { + self.transition_time + self.next.offset + } + + /// Gets the range of local times where this transition is active + /// + /// Note that this will always be start..end, NOT prev..next: if the next + /// offset is before prev (e.g. for a TransitionKind::Overlap) year, + /// it will be next..prev. + /// + /// You should use .kind() to understand how to interpret this + fn offset_range_local(&self) -> Range { + let prev = self.transition_time_prev_epoch(); + let next = self.transition_time_next_epoch(); + match self.kind() { + TransitionKind::Overlap => next..prev, + _ => prev..next, + } + } + + /// What is the kind of the transition? + fn kind(&self) -> TransitionKind { + match self.prev.offset.cmp(&self.next.offset) { + Ordering::Less => TransitionKind::Gap, + Ordering::Greater => TransitionKind::Overlap, + Ordering::Equal => TransitionKind::Smooth, + } + } + + /// If a time is found to be within self.offset_range_local(), + /// what is the corresponding LocalTimeRecordResult? + fn record_for_contains(&self) -> LocalTimeRecordResult { + match self.kind() { + TransitionKind::Gap => LocalTimeRecordResult::Empty(GapEntryOffsets { + offset_before: self.prev.as_utc_offset_seconds(), + offset_after: self.next.as_utc_offset_seconds(), + transition_epoch: EpochNanoseconds::from_seconds(self.transition_time), + }), + TransitionKind::Overlap => LocalTimeRecordResult::Ambiguous { + first: self.prev.as_utc_offset_seconds(), + second: self.next.as_utc_offset_seconds(), + }, + TransitionKind::Smooth => { + LocalTimeRecordResult::Single(self.prev.as_utc_offset_seconds()) + } + } + } +} diff --git a/provider/src/lib.rs b/provider/src/lib.rs index a910c90e8..431319d55 100644 --- a/provider/src/lib.rs +++ b/provider/src/lib.rs @@ -19,10 +19,10 @@ //! - `ZoneInfo64TzdbProvider`: a provider using ICU4C's zoneinfo64 resource bundle (enable with `zoneinfo64` features flag) //! - `FsTzdbProvider`: a provider that reads and parses tzdata at runtime from the host file system's //! TZif files (enable with `tzif` feature flag) -//! - `CompiledTzdbProvider`: a provider that reads and parses tzdata at runtime from TZif's compiled +//! - `CompiledTzdbProvider`: a provider that reads and parses tzdata at runtime from TZifs compiled //! into the application (enable with `tzif` feature flag) -//! -//! Coming soon (hopefully), a zero copy compiled tzdb provider (see `experimental_tzif` for more). +//! - `ZeroCompiledTzdbProvider`: a provider that deserializes time zone data from TZifs compiled +//! into the application (enable with `experimental_tzif` feature flag) //! //! ## Time zone provider traits //! @@ -87,6 +87,9 @@ pub mod zoneinfo64; pub mod epoch_nanoseconds; +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] +pub(crate) mod common; + #[doc(hidden)] pub mod utils; diff --git a/provider/src/tzif.rs b/provider/src/tzif.rs index 19898b785..e6236695c 100644 --- a/provider/src/tzif.rs +++ b/provider/src/tzif.rs @@ -31,8 +31,14 @@ use std::path::Path; #[cfg(target_family = "unix")] use std::path::PathBuf; -use crate::provider::EpochNanosecondsAndOffset; +use crate::common::{offset_range, Mwd, MwdForTime, TransitionType}; use crate::CompiledNormalizer; +use crate::{ + common::{ + DstTransitionInfoForYear, LocalTimeRecordResult, TimeZoneTransitionInfo, TransitionKind, + }, + provider::EpochNanosecondsAndOffset, +}; use alloc::collections::BTreeMap; use alloc::string::String; use alloc::vec::Vec; @@ -46,7 +52,7 @@ use combine::Parser; use tzif::{ self, data::{ - posix::{DstTransitionInfo, PosixTzString, TransitionDate, TransitionDay}, + posix::{PosixTzString, TransitionDate, TransitionDay}, time::Seconds, tzif::{DataBlock, LocalTimeTypeRecord, TzifData, TzifHeader}, }, @@ -66,54 +72,6 @@ use crate::{ #[cfg(target_family = "unix")] const ZONEINFO_DIR: &str = "/usr/share/zoneinfo/"; -// TODO: Workshop record name? -/// The `LocalTimeRecord` result represents the result of searching for a -/// time zone transition without the offset seconds applied to the -/// epoch seconds. -/// -/// As a result of the search, it is possible for the resulting search to be either -/// Empty (due to an invalid time being provided that would be in the +1 tz shift) -/// or two time zones (when a time exists in the ambiguous range of a -1 shift). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum LocalTimeRecordResult { - Empty(GapEntryOffsets), - Single(UtcOffsetSeconds), - Ambiguous { - first: UtcOffsetSeconds, - second: UtcOffsetSeconds, - }, -} - -/// `TimeZoneTransitionInfo` represents information about a timezone transition. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct TimeZoneTransitionInfo { - /// The transition time epoch at which the offset needs to be applied. - pub transition_epoch: Option, - /// The time zone offset in seconds. - pub offset: UtcOffsetSeconds, -} - -impl From for LocalTimeRecordResult { - fn from(value: UtcOffsetSeconds) -> Self { - Self::Single(value) - } -} - -impl From for LocalTimeRecordResult { - fn from(value: LocalTimeTypeRecord) -> Self { - Self::Single(value.into()) - } -} - -impl From<(LocalTimeTypeRecord, LocalTimeTypeRecord)> for LocalTimeRecordResult { - fn from(value: (LocalTimeTypeRecord, LocalTimeTypeRecord)) -> Self { - Self::Ambiguous { - first: value.0.into(), - second: value.1.into(), - } - } -} - /// `TZif` stands for Time zone information format is laid out by [RFC 8536][rfc8536] and /// laid out by the [tzdata manual][tzif-manual] /// @@ -428,12 +386,12 @@ impl Tzif { // before the unix epoch. let seconds_is_inexact_for_negative = seconds_is_negative && !seconds_is_exact; // We're before the first transition - if epoch_seconds < range.start - || (epoch_seconds == range.start && seconds_is_inexact_for_negative) + if epoch_seconds.0 < range.start + || (epoch_seconds.0 == range.start && seconds_is_inexact_for_negative) { range.start - } else if epoch_seconds < range.end - || (epoch_seconds == range.end && seconds_is_inexact_for_negative) + } else if epoch_seconds.0 < range.end + || (epoch_seconds.0 == range.end && seconds_is_inexact_for_negative) { // We're between the first and second transition range.end @@ -451,12 +409,12 @@ impl Tzif { let seconds_is_ineexact_for_positive = !seconds_is_negative && !seconds_is_exact; // We're after the second transition // (note that seconds_is_exact means that epoch_seconds == range.end actually means equality) - if epoch_seconds > range.end - || (epoch_seconds == range.end && seconds_is_ineexact_for_positive) + if epoch_seconds.0 > range.end + || (epoch_seconds.0 == range.end && seconds_is_ineexact_for_positive) { range.end - } else if epoch_seconds > range.start - || (epoch_seconds == range.start && seconds_is_ineexact_for_positive) + } else if epoch_seconds.0 > range.start + || (epoch_seconds.0 == range.start && seconds_is_ineexact_for_positive) { // We're after the first transition range.start @@ -472,16 +430,16 @@ impl Tzif { if let Some(last_tzif_transition) = last_tzif_transition { // When going Previous, we went back into the area of tzif transition - if seconds < last_tzif_transition { + if seconds < last_tzif_transition.0 { if let Some(last_real_tzif_transition) = last_real_tzif_transition() { - seconds = last_real_tzif_transition; + seconds = last_real_tzif_transition.0; } else { return Ok(None); } } } - Ok(Some(seconds.into())) + Ok(Some(EpochNanoseconds::from_seconds(seconds))) } // For more information, see /docs/TZDB.md @@ -774,61 +732,6 @@ impl TzifTransitionInfo { } } -#[derive(Debug)] -enum TransitionKind { - // The offsets didn't change (happens when abbreviations/savings values change) - Smooth, - // The offsets changed in a way that leaves a gap - Gap, - // The offsets changed in a way that produces overlapping time. - Overlap, -} - -/// Stores the information about DST transitions for a given year -struct DstTransitionInfoForYear { - dst_start_seconds: Seconds, - dst_end_seconds: Seconds, - std_offset: UtcOffsetSeconds, - dst_offset: UtcOffsetSeconds, -} - -impl DstTransitionInfoForYear { - fn compute( - posix_tz_string: &PosixTzString, - dst_variant: &DstTransitionInfo, - year: i32, - ) -> Self { - let std_offset = UtcOffsetSeconds::from(&posix_tz_string.std_info); - let dst_offset = UtcOffsetSeconds::from(&dst_variant.variant_info); - let dst_start_seconds = Seconds(calculate_transition_seconds_for_year( - year, - dst_variant.start_date, - std_offset, - )); - let dst_end_seconds = Seconds(calculate_transition_seconds_for_year( - year, - dst_variant.end_date, - dst_offset, - )); - Self { - dst_start_seconds, - dst_end_seconds, - std_offset, - dst_offset, - } - } - - // Returns the range between offsets in this year - // This may cover DST or standard time, whichever starts first - pub fn transition_range(&self) -> Range { - if self.dst_start_seconds > self.dst_end_seconds { - self.dst_end_seconds..self.dst_start_seconds - } else { - self.dst_start_seconds..self.dst_end_seconds - } - } -} - // NOTE: seconds here are epoch, so they are exact, not wall time. #[inline] fn resolve_posix_tz_string_for_epoch_seconds( @@ -846,8 +749,8 @@ fn resolve_posix_tz_string_for_epoch_seconds( let year = utils::epoch_time_to_iso_year(seconds * 1000); let transition_info = DstTransitionInfoForYear::compute(posix_tz_string, dst_variant, year); - let dst_start_seconds = transition_info.dst_start_seconds.0; - let dst_end_seconds = transition_info.dst_end_seconds.0; + let dst_start_seconds = transition_info.dst_start_seconds; + let dst_end_seconds = transition_info.dst_end_seconds; // Need to determine if the range being tested is standard or savings time. let dst_is_inversed = dst_end_seconds < dst_start_seconds; @@ -1050,119 +953,6 @@ fn resolve_posix_tz_string( } } -/// The month, week of month, and day of week value built into the POSIX tz string. -/// -/// For more information, see the [POSIX tz string docs](https://sourceware.org/glibc/manual/2.40/html_node/Proleptic-TZ.html) -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -struct Mwd { - month: u8, - week: u8, - day: u8, -} - -impl Mwd { - fn from_u16(month: u16, week: u16, day: u16) -> Self { - Self::from_u8( - u8::try_from(month).unwrap_or(0), - u8::try_from(week).unwrap_or(0), - u8::try_from(day).unwrap_or(0), - ) - } - - fn from_u8(month: u8, week: u8, day: u8) -> Self { - Self { month, week, day } - } - - /// Given the day of the week of the 0th day in this month, - /// normalize the week to being a week number (1 = first week, ...) - /// rather than a weekday ordinal (1 = first friday, etc) - fn normalize_to_week_number(&mut self, day_of_week_zeroth_day: u8) { - if self.day <= day_of_week_zeroth_day { - self.week += 1; - } - } -} - -/// Represents an MWD for a given time -#[derive(Debug)] -struct MwdForTime { - /// This will never have day = 5 - mwd: Mwd, - /// The day of the week of the 0th day (the day before the month starts) - day_of_week_zeroth_day: u8, - /// This is the day of week of the 29th and the last day of the month, - /// if the month has more than 28 days. - /// Basically, this is the start and end of the "fifth $weekday of the month" period - extra_days: Option<(u8, u8)>, -} - -impl MwdForTime { - fn from_seconds(seconds: i64) -> Self { - let (year, month, day_of_month) = utils::ymd_from_epoch_milliseconds(seconds * 1_000); - let week_of_month = day_of_month / 7 + 1; - let day_of_week = utils::epoch_seconds_to_day_of_week(seconds); - let mut mwd = Mwd::from_u8(month, week_of_month, day_of_week); - let days_in_month = utils::iso_days_in_month(year, month); - let day_of_week_zeroth_day = - (i16::from(day_of_week) - i16::from(day_of_month)).rem_euclid(7) as u8; - mwd.normalize_to_week_number(day_of_week_zeroth_day); - if day_of_month > 28 { - let day_of_week_day_29 = (day_of_week_zeroth_day + 29).rem_euclid(7); - let day_of_week_last_day = (day_of_week_zeroth_day + days_in_month).rem_euclid(7); - Self { - mwd, - day_of_week_zeroth_day, - extra_days: Some((day_of_week_day_29, day_of_week_last_day)), - } - } else { - // No day 5 - Self { - mwd, - day_of_week_zeroth_day, - extra_days: None, - } - } - } - - /// MWDs from Posix data can contain `w=5`, which means the *last* $weekday of the month, - /// not the 5th. For MWDs in the same month, this normalizes the 5 to the actual number of the - /// last weekday of the month (5 or 4) - /// - /// Furthermore, this turns the week number into a true week number: the "second friday in March" - /// will be turned into "the friday in the first week of March" or "the Friday in the second week of March" - /// depending on when March starts. - /// - /// This normalization *only* applies to MWDs in the same month. For other MWDs, such normalization is irrelevant. - fn normalize_mwd(&self, other: &mut Mwd) { - // If we're in the same month, normalization will actually have a useful effect - if self.mwd.month == other.month { - // First normalize MWDs that are like "the last $weekday in the month" - // the last $weekday in the month, we need special handling - if other.week == 5 { - if let Some((day_29, last_day)) = self.extra_days { - if day_29 < last_day { - if other.day < day_29 || other.day > last_day { - // This day isn't found in the last week. Subtract one. - other.week = 4; - } - } else { - // The extra part of the month crosses Sunday - if other.day < day_29 && other.day > last_day { - // This day isn't found in the last week. Subtract one. - other.week = 4; - } - } - } else { - // There is no week 5 in this month, normalize to 4 - other.week = 4; - } - } - - other.normalize_to_week_number(self.day_of_week_zeroth_day); - } - } -} - fn cmp_seconds_to_transitions( start: &TransitionDay, end: &TransitionDay, @@ -1227,28 +1017,6 @@ fn cmp_seconds_to_transitions( } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum TransitionType { - Dst, - Std, -} - -impl TransitionType { - fn invert(&mut self) { - *self = match *self { - Self::Dst => Self::Std, - Self::Std => Self::Dst, - } - } -} - -fn offset_range(offset_one: i64, offset_two: i64) -> core::ops::Range { - if offset_one < offset_two { - return offset_one..offset_two; - } - offset_two..offset_one -} - /// Timezone provider that uses compiled data. /// /// This provider includes raw tzdata in the application binary and parses that data into diff --git a/provider/src/utils.rs b/provider/src/utils.rs index 7874c5005..ce254788a 100644 --- a/provider/src/utils.rs +++ b/provider/src/utils.rs @@ -23,12 +23,12 @@ pub fn epoch_days_to_epoch_ms(day: i64, time: i64) -> i64 { /// `EpochTimeToDayNumber` /// /// This equation is the equivalent to `ECMAScript`'s `Date(t)` -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub(crate) fn epoch_time_to_day_number(t: i64) -> i32 { t.div_euclid(MS_PER_DAY as i64) as i32 } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub(crate) fn epoch_ms_to_ms_in_day(t: i64) -> u32 { (t.rem_euclid(i64::from(MS_PER_DAY))) as u32 } @@ -48,7 +48,7 @@ pub(crate) fn is_leap(y: i32) -> bool { } } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub(crate) fn epoch_time_to_iso_year(t: i64) -> i32 { let epoch_days = epoch_ms_to_epoch_days(t); let (rata_die, shift_constant) = neri_schneider::rata_die_for_epoch_days(epoch_days); @@ -56,7 +56,7 @@ pub(crate) fn epoch_time_to_iso_year(t: i64) -> i32 { } /// Returns the epoch day number for a given year. -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub(crate) fn epoch_days_for_year(y: i32) -> i32 { 365 * (y - 1970) + (y - 1969).div_euclid(4) - (y - 1901).div_euclid(100) + (y - 1601).div_euclid(400) @@ -71,7 +71,7 @@ pub fn ymd_from_epoch_milliseconds(epoch_milliseconds: i64) -> (i32, u8, u8) { neri_schneider::ymd_from_epoch_days(epoch_days) } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub(crate) fn month_to_day(m: u8, is_leap: bool) -> u16 { let leap_day = u16::from(is_leap); match m { @@ -91,12 +91,12 @@ pub(crate) fn month_to_day(m: u8, is_leap: bool) -> u16 { } } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub(crate) fn epoch_time_to_day_in_year(t: i64) -> i32 { epoch_time_to_day_number(t) - (epoch_days_for_year(epoch_time_to_iso_year(t))) } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub(crate) fn epoch_seconds_to_day_of_week(t: i64) -> u8 { ((t / 86_400) + 4).rem_euclid(7) as u8 } diff --git a/provider/src/utils/neri_schneider.rs b/provider/src/utils/neri_schneider.rs index 95fbb32b9..620c1784d 100644 --- a/provider/src/utils/neri_schneider.rs +++ b/provider/src/utils/neri_schneider.rs @@ -41,7 +41,7 @@ pub const EPOCH_COMPUTATIONAL_RATA_DIE: i32 = 719_468; pub const DAYS_IN_A_400Y_CYCLE: u32 = 146_097; -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] const TWO_POWER_THIRTY_NINE: u64 = 549_755_813_888; // 2^39 constant const TWO_POWER_THIRTY_TWO: u64 = 4_294_967_296; // 2^32 constant const TWO_POWER_SIXTEEN: u32 = 65_536; // 2^16 constant @@ -79,7 +79,7 @@ const fn rata_die_first_equations(year: i32, month: u8, day: u8) -> (u64, i64, i // Computational days to gregorian YMD // Determine j -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] const fn j(rata_die: u32) -> u32 { (computational_day_of_year(rata_die) >= 306) as u32 } @@ -88,7 +88,7 @@ const fn n_one(rata_die: u32) -> u32 { 4 * rata_die + 3 } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] const fn n_two(rata_die: u32) -> u32 { century_rem(rata_die) | 3 } @@ -101,12 +101,12 @@ const fn first_equations(rata_die: u32) -> (u32, u32) { (century_num, century_rem) } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] const fn century_rem(rata_die: u32) -> u32 { n_one(rata_die).rem_euclid(DAYS_IN_A_400Y_CYCLE) } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub const fn century_number(rata_die: u32) -> u32 { n_one(rata_die).div_euclid(DAYS_IN_A_400Y_CYCLE) } @@ -135,24 +135,24 @@ const fn third_equations(rata_die: u32) -> (u32, u32, u32, u32) { } // Z -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub const fn computational_year_of_century(rata_die: u32) -> u64 { (376_287_347 * n_two(rata_die) as u64).div_euclid(TWO_POWER_THIRTY_NINE) } // N_y -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub const fn computational_day_of_year(rata_die: u32) -> u32 { (n_two(rata_die) - 1461 * computational_year_of_century(rata_die) as u32).div_euclid(4) } // Y -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub const fn computational_year(rata_die: u32) -> u32 { 100 * century_number(rata_die) + computational_year_of_century(rata_die) as u32 } -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] pub const fn year(computational_rata_die: u32, shift_constant: i32) -> i32 { (computational_year(computational_rata_die) + j(computational_rata_die)) as i32 - shift_constant } @@ -186,7 +186,7 @@ pub const fn ymd_from_epoch_days(epoch_days: i32) -> (i32, u8, u8) { } #[cfg(test)] -#[cfg(feature = "tzif")] +#[cfg(any(feature = "tzif", feature = "experimental_tzif"))] mod tests { use super::*; diff --git a/src/builtins/core/zoned_date_time/tests.rs b/src/builtins/core/zoned_date_time/tests.rs index 26cb8b7a7..e159e7828 100644 --- a/src/builtins/core/zoned_date_time/tests.rs +++ b/src/builtins/core/zoned_date_time/tests.rs @@ -41,6 +41,14 @@ macro_rules! test_all_providers { $b } + + $(#[cfg($cfg_fs)])? { + std::println!("Testing ZeroCompiledZoneInfoProvider:"); + let fs = timezone_provider::experimental_tzif::ZeroCompiledTzdbProvider::default(); + let $provider = &fs; + + $b + } }}; }