diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index 5f9d424..af19625 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -102,7 +102,7 @@ jobs: auto-push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} alert-threshold: '110%' fail-on-alert: true - fail-threshold: '120%' + fail-threshold: '130%' comment-on-alert: true comment-always: ${{ github.event_name == 'pull_request' }} benchmark-data-dir-path: dev/bench diff --git a/src/decoder/complete.rs b/src/decoder/complete.rs index 72eed34..67cc61d 100644 --- a/src/decoder/complete.rs +++ b/src/decoder/complete.rs @@ -31,7 +31,7 @@ pub(crate) type Table = [Entry; 256]; #[inline(always)] pub(crate) fn decode_helper<'a>(table: &Table, src: &'a [u8]) -> Cow<'a, str> { - if src.is_ascii() { + if crate::is_ascii(src) { let s = unsafe { std::str::from_utf8_unchecked(src) }; return s.into(); } diff --git a/src/decoder/incomplete.rs b/src/decoder/incomplete.rs index adbb8e7..f0b6bbd 100644 --- a/src/decoder/incomplete.rs +++ b/src/decoder/incomplete.rs @@ -60,7 +60,7 @@ pub(crate) fn decode_helper<'a>( fallback: Option, ) -> Result, DecodeError> { let fallback: Option = fallback.map(Entry::from_char); - if bytes.is_ascii() { + if crate::is_ascii(bytes) { let s = unsafe { std::str::from_utf8_unchecked(bytes) }; return Ok(s.into()); } diff --git a/src/encoder.rs b/src/encoder.rs index a41d340..44809f3 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -12,7 +12,7 @@ pub trait Encoder { fallback: Option, ) -> Result, EncodeError> { let mut src = s.as_bytes(); - if s.is_ascii() { + if crate::is_ascii_str(s) { return Ok(src.into()); } let len = s.chars().count(); diff --git a/src/lib.rs b/src/lib.rs index 0997cb8..01ea3fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,9 @@ use thiserror::Error; pub mod code_pages; pub(crate) mod decoder; mod encoder; +mod simd; pub(crate) use encoder::Encoder; +pub(crate) use simd::{is_ascii, is_ascii_str}; #[derive(Error, Debug)] #[error("Character in UTF-8 string has no mapping defined in code page")] diff --git a/src/simd.rs b/src/simd.rs new file mode 100644 index 0000000..3b781c1 --- /dev/null +++ b/src/simd.rs @@ -0,0 +1,51 @@ +//! SIMD-optimized ASCII detection. +//! +//! Works around LLVM codegen issues with `-C target-cpu=native` on AVX512 CPUs. + +/// Check if all bytes are ASCII (< 128). +#[inline] +pub fn is_ascii(bytes: &[u8]) -> bool { + #[cfg(target_arch = "x86_64")] + if is_x86_feature_detected!("avx512bw") { + return unsafe { is_ascii_avx512(bytes) }; + } + bytes.is_ascii() +} + +/// Check if all characters in the string are ASCII. +#[inline] +pub fn is_ascii_str(s: &str) -> bool { + is_ascii(s.as_bytes()) +} + +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "avx512bw")] +/// # Safety +/// Caller must ensure AVX512BW is available (via `is_x86_feature_detected!`). +unsafe fn is_ascii_avx512(bytes: &[u8]) -> bool { + use core::arch::x86_64::*; + + let ptr = bytes.as_ptr(); + let len = bytes.len(); + let mut i = 0; + + // Process 64-byte chunks + // SAFETY: Loop condition ensures i+64 <= len + while i + 64 <= len { + if _mm512_movepi8_mask(_mm512_loadu_si512(ptr.add(i).cast())) != 0 { + return false; + } + i += 64; + } + + // Tail: masked load for remaining bytes + if i < len { + // SAFETY: Mask has only `len - i` bits set, so only valid bytes are loaded + let mask = (1u64 << (len - i)) - 1; + if _mm512_movepi8_mask(_mm512_maskz_loadu_epi8(mask, ptr.add(i).cast())) != 0 { + return false; + } + } + + true +}