Skip to content

Commit

Permalink
Remove some panics from zerovec's derive and icu_codepointtrie (#6052)
Browse files Browse the repository at this point in the history
Discovered whilst working on bionic, I noticed panic machinery being
pulled in.
  • Loading branch information
Manishearth authored Feb 2, 2025
1 parent 5382dcf commit 3c7d35e
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 21 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
- `LocaleExpander`, `LocaleDirectionality`, and `LocaleCanonicalizer` distinguish between `new_common()` and `new_extended()` constructors (unicode-org#5958)
- `icu_segmenter`
- Segmenters that can take a content locale now specify `_root()` on their default localeless constructors (unicode-org#5958)

- Utils
- `zerovec`
- derive: Reduce number of panicky calls introduced by derive (unicode-org#6052)
## icu4x 2.0-beta1

- Components
Expand Down
66 changes: 57 additions & 9 deletions components/collections/src/codepointtrie/cptrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,46 @@ impl TryFrom<u8> for TrieType {
}
}

// Helper macro that turns arithmetic into wrapping-in-release, checked-in-debug arithmetic
//
// This is rustc's default behavior anyway, however some projects like Android deliberately
// enable overflow checks. CodePointTrie::get() is intended to be used in Android bionic which
// cares about codesize and we don't want the pile of panicking infrastructure brought in by overflow
// checks, so we force wrapping in release.
// See #6052
macro_rules! w(
// Note: these matchers are not perfect since you cannot have an operator after an expr matcher
// Use variables if you need complex first operands.
($a:tt + $b:expr) => {
{
#[allow(unused_parens)]
let a = $a;
let b = $b;
debug_assert!(a.checked_add(b).is_some());
$a.wrapping_add($b)
}
};
($a:tt - $b:expr) => {

{
#[allow(unused_parens)]
let a = $a;
let b = $b;
debug_assert!(a.checked_sub(b).is_some());
$a.wrapping_sub($b)
}
};
($a:tt * $b:expr) => {
{
#[allow(unused_parens)]
let a = $a;
let b = $b;
debug_assert!(a.checked_mul(b).is_some());
$a.wrapping_mul($b)
}
};
);

impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
#[doc(hidden)] // databake internal
pub const fn from_parts(
Expand Down Expand Up @@ -236,26 +276,34 @@ impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
/// error value.
#[inline(always)] // `always` based on normalizer benchmarking
fn trie_error_val_index(&self) -> u32 {
self.data.len() as u32 - ERROR_VALUE_NEG_DATA_OFFSET
// We use wrapping_sub here to avoid panicky overflow checks.
// len should always be > 1, but if it isn't this will just cause GIGO behavior of producing
// None on `.get()`
debug_assert!(self.data.len() as u32 >= ERROR_VALUE_NEG_DATA_OFFSET);
w!((self.data.len() as u32) - ERROR_VALUE_NEG_DATA_OFFSET)
}

fn internal_small_index(&self, code_point: u32) -> u32 {
// We use wrapping arithmetic here to avoid overflow checks making their way into binaries
// with overflow checks enabled. Ultimately this code ends up as a checked index, so any
// bugs here will cause GIGO
let mut index1_pos: u32 = code_point >> SHIFT_1;
if self.header.trie_type == TrieType::Fast {
debug_assert!(
FAST_TYPE_FAST_INDEXING_MAX < code_point && code_point < self.header.high_start
);
index1_pos = index1_pos + BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
index1_pos = w!(index1_pos + BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH);
} else {
assert!(code_point < self.header.high_start && self.header.high_start > SMALL_LIMIT);
index1_pos += SMALL_INDEX_LENGTH;
index1_pos = w!(index1_pos + SMALL_INDEX_LENGTH);
}
let index1_val = if let Some(index1_val) = self.index.get(index1_pos as usize) {
index1_val
} else {
return self.trie_error_val_index();
};
let index3_block_idx: u32 = (index1_val as u32) + ((code_point >> SHIFT_2) & INDEX_2_MASK);
let index3_block_idx: u32 =
w!((index1_val as u32) + (code_point >> SHIFT_2) & INDEX_2_MASK);
let mut index3_block: u32 =
if let Some(index3_block) = self.index.get(index3_block_idx as usize) {
index3_block as u32
Expand All @@ -267,32 +315,32 @@ impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
if index3_block & 0x8000 == 0 {
// 16-bit indexes
data_block =
if let Some(data_block) = self.index.get((index3_block + index3_pos) as usize) {
if let Some(data_block) = self.index.get(w!(index3_block + index3_pos) as usize) {
data_block as u32
} else {
return self.trie_error_val_index();
};
} else {
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
index3_block = (index3_block & 0x7fff) + (index3_pos & !7) + (index3_pos >> 3);
index3_block = w!((index3_block & 0x7fff) + w!((index3_pos & !7) + index3_pos >> 3));
index3_pos &= 7;
data_block = if let Some(data_block) = self.index.get(index3_block as usize) {
data_block as u32
} else {
return self.trie_error_val_index();
};
data_block = (data_block << (2 + (2 * index3_pos))) & 0x30000;
data_block = (data_block << w!(2u32 + w!(2u32 * index3_pos))) & 0x30000;
index3_block += 1;
data_block =
if let Some(index3_val) = self.index.get((index3_block + index3_pos) as usize) {
if let Some(index3_val) = self.index.get(w!(index3_block + index3_pos) as usize) {
data_block | (index3_val as u32)
} else {
return self.trie_error_val_index();
};
}
// Returns data_pos == data_block (offset) +
// portion of code_point bit field for last (4th) lookup
data_block + (code_point & SMALL_DATA_MASK)
w!(data_block + code_point & SMALL_DATA_MASK)
}

/// Returns the position in the `data` array for the given code point,
Expand Down
9 changes: 7 additions & 2 deletions utils/zerovec/derive/src/make_varule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,9 @@ fn make_encode_impl(
let ty = &field.field.ty;
let accessor = &field.accessor;
quote!(
#[allow(clippy::indexing_slicing)] // generate_per_field_offsets produces valid indices
// generate_per_field_offsets produces valid indices,
// and we don't care about panics in Encode impls
#[allow(clippy::indexing_slicing)]
let out = &mut dst[#prev_offset_ident .. #prev_offset_ident + #size_ident];
let unaligned = zerovec::ule::AsULE::to_unaligned(self.#accessor);
let unaligned_slice = &[unaligned];
Expand Down Expand Up @@ -443,7 +445,10 @@ fn make_encode_impl(
debug_assert_eq!(self.encode_var_ule_len(), dst.len());
#encoders

#[allow(clippy::indexing_slicing)] // generate_per_field_offsets produces valid remainder

// generate_per_field_offsets produces valid remainders,
// and we don't care about panics in Encode impls
#[allow(clippy::indexing_slicing)]
let out = &mut dst[#remaining_offset..];
#last_encode_write
}
Expand Down
7 changes: 5 additions & 2 deletions utils/zerovec/derive/src/ule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ pub(crate) fn generate_ule_validators(
utils::generate_per_field_offsets(fields, false, |field, prev_offset_ident, size_ident| {
let ty = &field.field.ty;
quote! {
#[allow(clippy::indexing_slicing)] // generate_per_field_offsets produces valid indices
<#ty as zerovec::ule::ULE>::validate_bytes(&bytes[#prev_offset_ident .. #prev_offset_ident + #size_ident])?;
if let Some(bytes) = bytes.get(#prev_offset_ident .. #prev_offset_ident + #size_ident) {
<#ty as zerovec::ule::ULE>::validate_bytes(bytes)?;
} else {
return Err(zerovec::ule::UleError::parse::<Self>());
}
}
})
}
Expand Down
13 changes: 6 additions & 7 deletions utils/zerovec/derive/src/varule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,22 +102,21 @@ pub fn derive_impl(
unsafe impl zerovec::ule::VarULE for #name {
#[inline]
fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
debug_assert_eq!(#remaining_offset, #ule_size);

if bytes.len() < #ule_size {
let Some(last_field_bytes) = bytes.get(#remaining_offset..) else {
return Err(zerovec::ule::UleError::parse::<Self>());
}
};
#validators
debug_assert_eq!(#remaining_offset, #ule_size);
#[allow(clippy::indexing_slicing)] // TODO explain
let last_field_bytes = &bytes[#remaining_offset..];
#last_field_validator
Ok(())
}
#[inline]
unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
// just the unsized part
#[allow(clippy::indexing_slicing)] // TODO explain
let unsized_bytes = &bytes[#ule_size..];
// Safety: The invariants of this function allow us to assume bytes is valid, and
// having at least #ule_size bytes is a validity constraint for the ULE type.
let unsized_bytes = bytes.get_unchecked(#ule_size..);
let unsized_ref = <#unsized_field as zerovec::ule::VarULE>::from_bytes_unchecked(unsized_bytes);
// We should use the pointer metadata APIs here when they are stable: https://github.com/rust-lang/rust/issues/81513
// For now we rely on all DST metadata being a usize to extract it via a fake slice pointer
Expand Down

0 comments on commit 3c7d35e

Please sign in to comment.