Skip to content

Commit

Permalink
Move MeasureUnit to separate module and Implement UnitsTrieV1 Dat…
Browse files Browse the repository at this point in the history
…a Provider (unicode-org#5348)
  • Loading branch information
younies authored Aug 22, 2024
1 parent 55759ac commit bb91fb3
Show file tree
Hide file tree
Showing 31 changed files with 604 additions and 103 deletions.
2 changes: 2 additions & 0 deletions components/experimental/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ pub mod provider {
impl_short_week_relative_time_format_data_v1_marker!(Baked);
impl_short_year_relative_time_format_data_v1_marker!(Baked);
impl_units_info_v1_marker!(Baked);
impl_units_trie_v1_marker!(Baked);
};

#[cfg(feature = "datagen")]
Expand All @@ -111,6 +112,7 @@ pub mod provider {
super::displaynames::provider::RegionDisplayNamesV1Marker::INFO,
super::displaynames::provider::ScriptDisplayNamesV1Marker::INFO,
super::displaynames::provider::VariantDisplayNamesV1Marker::INFO,
super::measure::provider::trie::UnitsTrieV1Marker::INFO,
super::personnames::provider::PersonNamesFormatV1Marker::INFO,
super::relativetime::provider::LongDayRelativeTimeFormatDataV1Marker::INFO,
super::relativetime::provider::LongHourRelativeTimeFormatDataV1Marker::INFO,
Expand Down
19 changes: 19 additions & 0 deletions components/experimental/src/measure/measureunit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use smallvec::SmallVec;

use super::provider::single_unit::SingleUnit;

// TODO NOTE: the MeasureUnitParser takes the trie and the ConverterFactory takes the full payload and an instance of MeasureUnitParser.
/// MeasureUnit is a struct that contains a processed CLDR unit.
/// For example, "meter-per-second".
/// NOTE:
/// - To construct a MeasureUnit from a cldr unit identifier, use the `MeasureUnitParser`.
#[derive(Debug)]
pub struct MeasureUnit {
// TODO: make this field private and add functions to use it.
/// Contains the processed units.
pub contained_units: SmallVec<[SingleUnit; 8]>,
}
4 changes: 4 additions & 0 deletions components/experimental/src/measure/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@

pub(crate) mod power;
pub(crate) mod si_prefix;

pub mod measureunit;
pub mod parser;
pub mod provider;
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use alloc::vec::Vec;
use smallvec::SmallVec;
use zerotrie::ZeroTrieSimpleAscii;

use crate::measure::measureunit::MeasureUnit;
use crate::measure::power::get_power;
use crate::measure::si_prefix::get_si_prefix;
use crate::units::InvalidUnitError;

use crate::units::{
provider::{Base, MeasureUnitItem, SiPrefix},
InvalidUnitError,
};
use super::provider::si_prefix::{Base, SiPrefix};
use super::provider::single_unit::SingleUnit;

// TODO: add test cases for this parser after adding UnitsTest.txt to the test data.
/// A parser for the CLDR unit identifier (e.g. `meter-per-square-second`)
Expand Down Expand Up @@ -97,7 +96,7 @@ impl<'data> MeasureUnitParser<'data> {
return Err(InvalidUnitError);
}

let mut measure_unit_items = Vec::<MeasureUnitItem>::new();
let mut measure_unit_items = Vec::<SingleUnit>::new();
let mut sign = 1;
while !code_units.is_empty() {
// First: extract the power.
Expand Down Expand Up @@ -138,7 +137,7 @@ impl<'data> MeasureUnitParser<'data> {
}
};

measure_unit_items.push(MeasureUnitItem {
measure_unit_items.push(SingleUnit {
power: sign * power as i8,
si_prefix,
unit_id,
Expand All @@ -158,10 +157,3 @@ impl<'data> MeasureUnitParser<'data> {
})
}
}

// TODO NOTE: the MeasureUnitParser takes the trie and the ConverterFactory takes the full payload and an instance of MeasureUnitParser.
#[derive(Debug)]
pub struct MeasureUnit {
/// Contains the processed units.
pub contained_units: SmallVec<[MeasureUnitItem; 8]>,
}
7 changes: 7 additions & 0 deletions components/experimental/src/measure/provider/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

pub mod si_prefix;
pub mod single_unit;
pub mod trie;
47 changes: 47 additions & 0 deletions components/experimental/src/measure/provider/si_prefix.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

// Provider structs must be stable
#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]

//! Data provider struct definitions for this ICU4X component.
//!
//! Read more about data providers: [`icu_provider`]
/// Represents the base of an si prefix.
#[zerovec::make_ule(BaseULE)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_experimental::units::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[repr(u8)]
pub enum Base {
/// The base of the si prefix is 10.
#[default]
Decimal = 0,

/// The base of the si prefix is 2.
Binary = 1,
}

// TODO: Consider reducing the size of this struct while implementing the ULE.
/// Represents the SI prefix.
#[zerovec::make_ule(SiPrefixULE)]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_experimental::units::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct SiPrefix {
/// The absolute value of the power of the si prefix.
pub power: i8,

/// The base of the si prefix.
pub base: Base,
}
28 changes: 28 additions & 0 deletions components/experimental/src/measure/provider/single_unit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use super::si_prefix::SiPrefix;

/// Represents a single unit in a measure unit.
/// For example, the MeasureUnit `kilometer-per-square-second` contains two single units:
/// 1. `kilometer` with power 1 and prefix 3 with base 10.
/// 2. `second` with power -2 and prefix power equal to 0.
#[zerovec::make_ule(SingleUnitULE)]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_experimental::units::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct SingleUnit {
/// The power of the unit.
pub power: i8,

/// The si base of the unit.
pub si_prefix: SiPrefix,

/// The id of the unit.
pub unit_id: u16,
}
46 changes: 46 additions & 0 deletions components/experimental/src/measure/provider/trie.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

// Provider structs must be stable
#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]

//! Data provider struct definitions for this ICU4X component.
//!
//! Read more about data providers: [`icu_provider`]
use icu_provider::prelude::*;
use zerotrie::ZeroTrieSimpleAscii;
use zerovec::ZeroVec;

#[cfg(feature = "compiled_data")]
/// Baked data
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
/// </div>
pub use crate::provider::Baked;

/// This type encapsulates all the constant data required for unit conversions.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[icu_provider::data_struct(marker(UnitsTrieV1Marker, "units/trie@1", singleton))]
#[derive(Clone, PartialEq, Debug)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_experimental::measure::provider::trie),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct UnitsTrieV1<'data> {
// TODO: remove this field from units/provider::UnitsInfoV1 once the `MeasureUnit` is fully used in the measurement units.
/// Maps from unit name (e.g. foot or meter) to its unit id. this id can be used to retrieve the conversion information from the `UnitsInfoV1`.
#[cfg_attr(feature = "serde", serde(borrow))]
pub trie: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>,
}
2 changes: 1 addition & 1 deletion components/experimental/src/measure/si_prefix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use zerotrie::ZeroTrieSimpleAscii;

use crate::units::provider::{Base, SiPrefix};
use super::provider::si_prefix::{Base, SiPrefix};

/// The offset of the SI prefixes.
/// NOTE:
Expand Down
11 changes: 6 additions & 5 deletions components/experimental/src/units/converter_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::measure::measureunit::MeasureUnit;
use crate::measure::parser::MeasureUnitParser;
use crate::measure::provider::single_unit::SingleUnit;
use crate::units::provider;
use crate::units::provider::MeasureUnitItem;
use crate::units::ratio::IcuRatio;
use crate::units::{
converter::{
OffsetConverter, ProportionalConverter, ReciprocalConverter, UnitsConverter,
UnitsConverterInner,
},
measureunit::{MeasureUnit, MeasureUnitParser},
provider::Sign,
};

Expand Down Expand Up @@ -175,7 +176,7 @@ impl ConverterFactory {
/// For example, `newton` has the basic units: `gram`, `meter`, and `second` (each one has it is own power and si prefix).
fn insert_non_basic_units(
factory: &ConverterFactory,
units: &[MeasureUnitItem],
units: &[SingleUnit],
sign: i16,
map: &mut LiteMap<u16, PowersInfo>,
) -> Option<()> {
Expand All @@ -200,7 +201,7 @@ impl ConverterFactory {
/// For example, `square-foot` , the base unit is `meter` with power 1.
/// Thus, the inserted power should be `1 * 2 = 2`.
fn insert_base_units(
basic_units: &ZeroSlice<MeasureUnitItem>,
basic_units: &ZeroSlice<SingleUnit>,
original_power: i16,
sign: i16,
map: &mut LiteMap<u16, PowersInfo>,
Expand Down Expand Up @@ -248,7 +249,7 @@ impl ConverterFactory {
}
}

fn compute_conversion_term(&self, unit_item: &MeasureUnitItem, sign: i8) -> Option<IcuRatio> {
fn compute_conversion_term(&self, unit_item: &SingleUnit, sign: i8) -> Option<IcuRatio> {
let conversion_info = self
.payload
.get()
Expand Down
1 change: 0 additions & 1 deletion components/experimental/src/units/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use displaydoc::Display;
pub mod converter;
pub mod converter_factory;
pub mod convertible;
pub mod measureunit;
pub mod provider;
pub mod ratio;

Expand Down
63 changes: 3 additions & 60 deletions components/experimental/src/units/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use num_bigint::BigInt;
use zerotrie::ZeroTrieSimpleAscii;
use zerovec::{ule::AsULE, VarZeroVec, ZeroVec};

use crate::measure::provider::single_unit::SingleUnit;
#[cfg(feature = "compiled_data")]
/// Baked data
///
Expand Down Expand Up @@ -42,6 +43,7 @@ use super::ratio::IcuRatio;
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct UnitsInfoV1<'data> {
// TODO: remove this field once we are using this map from `measure/provider::UnitsTrie`.
/// Maps from unit name (e.g. foot) to it is conversion information.
#[cfg_attr(feature = "serde", serde(borrow))]
pub units_conversion_trie: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>,
Expand Down Expand Up @@ -75,7 +77,7 @@ pub struct UnitsInfoV1<'data> {
pub struct ConversionInfo<'data> {
/// Contains the base unit (after parsing) which what the unit is converted to.
#[cfg_attr(feature = "serde", serde(borrow))]
pub basic_units: ZeroVec<'data, MeasureUnitItem>,
pub basic_units: ZeroVec<'data, SingleUnit>,

/// Represents the numerator of the conversion factor.
#[cfg_attr(feature = "serde", serde(borrow))]
Expand Down Expand Up @@ -137,65 +139,6 @@ pub enum Exactness {
Approximate = 1,
}

#[zerovec::make_ule(BaseULE)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_experimental::units::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[repr(u8)]
pub enum Base {
/// The base of the si prefix is 10.
#[default]
Decimal = 0,

/// The base of the si prefix is 2.
Binary = 1,
}

/// Represents an Item of a MeasureUnit.
/// For example, the MeasureUnit `kilometer-per-square-second` contains two items:
/// 1. `kilometer` with power 1 and prefix 3 with base 10.
/// 2. `second` with power -2 and prefix `NotExist`.
#[zerovec::make_ule(MeasureUnitItemULE)]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_experimental::units::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct MeasureUnitItem {
/// The power of the unit.
pub power: i8,

/// The si base of the unit.
pub si_prefix: SiPrefix,

/// The id of the unit.
pub unit_id: u16,
}

// TODO: Consider reducing the size of this struct while implementing the ULE.
/// Represents the SI prefix.
#[zerovec::make_ule(SiPrefixULE)]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_experimental::units::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct SiPrefix {
/// The absolute value of the power of the si prefix.
pub power: i8,

/// The base of the si prefix.
pub base: Base,
}

impl ConversionInfoULE {
/// Extracts the conversion factor as [`super::ratio::IcuRatio`].
pub(crate) fn factor_as_ratio(&self) -> IcuRatio {
Expand Down
2 changes: 1 addition & 1 deletion components/experimental/src/units/ratio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use num_rational::Ratio;
use num_traits::Signed;
use num_traits::{One, Pow, Zero};

use super::provider::{Base, SiPrefix};
use crate::measure::provider::si_prefix::{Base, SiPrefix};

// TODO: add test cases for IcuRatio.
// TODO: Make a decicion on whether to keep the `IcuRatio` public or not.
Expand Down
2 changes: 1 addition & 1 deletion ffi/capi/bindings/dart/MeasureUnit.g.dart

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bb91fb3

Please sign in to comment.