Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SSE2/AVX2/WASM SIMD support #86

Merged
merged 13 commits into from
Mar 19, 2024
26 changes: 20 additions & 6 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ jobs:
strategy:
matrix:
rust: [1.56.0, stable, nightly]

features: ["+avx2", "+sse2", "-avx2,-sse2"]
env:
RUSTCFLAGS: "-C target-features={{matrix.features}}"
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
profile: minimal
Expand All @@ -43,7 +45,7 @@ jobs:
rust: [stable]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
profile: minimal
Expand All @@ -64,7 +66,7 @@ jobs:
rust: [stable]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
profile: minimal
Expand All @@ -85,7 +87,7 @@ jobs:
rust: [stable]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
profile: minimal
Expand All @@ -95,4 +97,16 @@ jobs:
- name: Run Clippy
run: |
cd benches
cargo bench --bench benches --no-run
cargo bench --bench benches --no-run

build-wasm:
runs-on: ubuntu-latest
timeout-minutes: 30
needs: build
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
target: wasm32-unknown-unknown
- name: Check wasm
run: cargo check --target wasm32-unknown-unknown
108 changes: 108 additions & 0 deletions src/block/avx2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use core::{
cmp::Ordering,
hash::{Hash, Hasher},
iter::Iterator,
ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not},
};

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(__m256i);

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}

#[inline]
pub fn is_empty(self) -> bool {
unsafe { _mm256_testz_si256(self.0, self.0) == 1 }
}

#[inline]
pub fn andnot(self, other: Self) -> Self {
Self(unsafe { _mm256_andnot_si256(other.0, self.0) })
}
}

impl Not for Block {
type Output = Block;
#[inline]
fn not(self) -> Self::Output {
unsafe { Self(_mm256_xor_si256(self.0, Self::ALL.0)) }
}
}

impl BitAnd for Block {
type Output = Block;
#[inline]
fn bitand(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_and_si256(self.0, other.0)) }
}
}

impl BitAndAssign for Block {
#[inline]
fn bitand_assign(&mut self, other: Self) {
unsafe {
self.0 = _mm256_and_si256(self.0, other.0);
}
}
}

impl BitOr for Block {
type Output = Block;
#[inline]
fn bitor(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_or_si256(self.0, other.0)) }
}
}

impl BitOrAssign for Block {
#[inline]
fn bitor_assign(&mut self, other: Self) {
unsafe {
self.0 = _mm256_or_si256(self.0, other.0);
}
}
}

impl BitXor for Block {
type Output = Block;
#[inline]
fn bitxor(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_xor_si256(self.0, other.0)) }
}
}

impl BitXorAssign for Block {
#[inline]
fn bitxor_assign(&mut self, other: Self) {
unsafe { self.0 = _mm256_xor_si256(self.0, other.0) }
}
}

impl PartialEq for Block {
#[inline]
fn eq(&self, other: &Self) -> bool {
unsafe {
let eq = _mm256_cmpeq_epi8(self.0, other.0);
_mm256_movemask_epi8(eq) == !(0i32)
}
}
}
76 changes: 76 additions & 0 deletions src/block/default.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use core::iter::Iterator;
use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};

#[derive(Copy, Clone, PartialEq, Debug)]
#[repr(transparent)]
pub struct Block(usize);

impl Block {
pub const USIZE_COUNT: usize = 1;
pub const NONE: Self = Block(0);
pub const ALL: Self = Block(!0);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub const fn is_empty(self) -> bool {
self.0 == Self::NONE.0
}

#[inline]
pub fn andnot(self, other: Self) -> Self {
Self(!other.0 & self.0)
}
}

impl Not for Block {
type Output = Block;
#[inline]
fn not(self) -> Self::Output {
Self(self.0.not())
}
}

impl BitAnd for Block {
type Output = Block;
#[inline]
fn bitand(self, other: Self) -> Self::Output {
Self(self.0.bitand(other.0))
}
}

impl BitAndAssign for Block {
#[inline]
fn bitand_assign(&mut self, other: Self) {
self.0.bitand_assign(other.0);
}
}

impl BitOr for Block {
type Output = Block;
#[inline]
fn bitor(self, other: Self) -> Self::Output {
Self(self.0.bitor(other.0))
}
}

impl BitOrAssign for Block {
#[inline]
fn bitor_assign(&mut self, other: Self) {
self.0.bitor_assign(other.0)
}
}

impl BitXor for Block {
type Output = Block;
#[inline]
fn bitxor(self, other: Self) -> Self::Output {
Self(self.0.bitxor(other.0))
}
}

impl BitXorAssign for Block {
#[inline]
fn bitxor_assign(&mut self, other: Self) {
self.0.bitxor_assign(other.0)
}
}
76 changes: 76 additions & 0 deletions src/block/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use core::cmp::Ordering;
use core::hash::{Hash, Hasher};

#[cfg(all(
not(target_arch = "wasm32"),
not(target_feature = "sse2"),
not(target_feature = "avx2"),
))]
mod default;
#[cfg(all(
not(target_arch = "wasm32"),
not(target_feature = "sse2"),
not(target_feature = "avx2"),
))]
pub use self::default::*;

#[cfg(all(
not(target_arch = "wasm32"),
target_feature = "sse2",
not(target_feature = "avx2"),
))]
mod sse2;
#[cfg(all(
not(target_arch = "wasm32"),
target_feature = "sse2",
not(target_feature = "avx2"),
))]
pub use self::sse2::*;

#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
mod avx2;
#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
pub use self::avx2::*;

#[cfg(target_arch = "wasm32")]
mod wasm32;
#[cfg(target_arch = "wasm32")]
pub use self::wasm32::*;

impl Eq for Block {}

impl PartialOrd for Block {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for Block {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
let a = self.into_usize_array();
let b = other.into_usize_array();
for i in 0..Self::USIZE_COUNT {
match a[i].cmp(&b[i]) {
Ordering::Equal => continue,
cmp => return cmp,
}
}
Ordering::Equal
}
}

impl Default for Block {
#[inline]
fn default() -> Self {
Self::NONE
}
}

impl Hash for Block {
#[inline]
fn hash<H: Hasher>(&self, hasher: &mut H) {
self.into_usize_array().hash(hasher)
}
}
Loading
Loading