diff --git a/src/protocol/frame/mask.rs b/src/protocol/frame/mask.rs index 0ebcf55..28f0eaf 100644 --- a/src/protocol/frame/mask.rs +++ b/src/protocol/frame/mask.rs @@ -1,8 +1,3 @@ -use std::cmp::min; -#[allow(deprecated)] -use std::mem::uninitialized; -use std::ptr::{copy_nonoverlapping, read_unaligned}; - /// Generate a random frame mask. #[inline] pub fn generate_mask() -> [u8; 4] { @@ -17,7 +12,6 @@ pub fn apply_mask(buf: &mut [u8], mask: [u8; 4]) { /// A safe unoptimized mask application. #[inline] -#[allow(dead_code)] fn apply_mask_fallback(buf: &mut [u8], mask: [u8; 4]) { for (i, byte) in buf.iter_mut().enumerate() { *byte ^= mask[i & 3]; @@ -26,21 +20,13 @@ fn apply_mask_fallback(buf: &mut [u8], mask: [u8; 4]) { /// Faster version of `apply_mask()` which operates on 4-byte blocks. #[inline] -#[allow(dead_code, clippy::cast_ptr_alignment)] -fn apply_mask_fast32(buf: &mut [u8], mask: [u8; 4]) { - let mask_u32: u32 = unsafe { read_unaligned(mask.as_ptr() as *const u32) }; - - let mut ptr = buf.as_mut_ptr(); - let mut len = buf.len(); +pub fn apply_mask_fast32(buf: &mut [u8], mask: [u8; 4]) { + let mask_u32 = u32::from_ne_bytes(mask); - // Possible first unaligned block. - let head = min(len, (4 - (ptr as usize & 3)) & 3); + let (mut prefix, words, mut suffix) = unsafe { buf.align_to_mut::() }; + apply_mask_fallback(&mut prefix, mask); + let head = prefix.len() & 3; let mask_u32 = if head > 0 { - unsafe { - xor_mem(ptr, mask_u32, head); - ptr = ptr.add(head); - } - len -= head; if cfg!(target_endian = "big") { mask_u32.rotate_left(8 * head as u32) } else { @@ -49,39 +35,10 @@ fn apply_mask_fast32(buf: &mut [u8], mask: [u8; 4]) { } else { mask_u32 }; - - if len > 0 { - debug_assert_eq!(ptr as usize % 4, 0); - } - - // Properly aligned middle of the data. - while len > 4 { - unsafe { - *(ptr as *mut u32) ^= mask_u32; - ptr = ptr.offset(4); - len -= 4; - } + for word in words.iter_mut() { + *word ^= mask_u32; } - - // Possible last block. - if len > 0 { - unsafe { - xor_mem(ptr, mask_u32, len); - } - } -} - -#[inline] -// TODO: copy_nonoverlapping here compiles to call memcpy. While it is not so inefficient, -// it could be done better. The compiler does not see that len is limited to 3. -unsafe fn xor_mem(ptr: *mut u8, mask: u32, len: usize) { - #[allow(deprecated)] - let mut b: u32 = uninitialized(); - #[allow(trivial_casts)] - copy_nonoverlapping(ptr, &mut b as *mut _ as *mut u8, len); - b ^= mask; - #[allow(trivial_casts)] - copy_nonoverlapping(&b as *const _ as *const u8, ptr, len); + apply_mask_fallback(&mut suffix, mask_u32.to_ne_bytes()); } #[cfg(test)]