diff --git a/src/protocol/frame/mask.rs b/src/protocol/frame/mask.rs index b357795..28f0eaf 100644 --- a/src/protocol/frame/mask.rs +++ b/src/protocol/frame/mask.rs @@ -1,9 +1,3 @@ -use rand; -use std::cmp::min; -#[allow(deprecated)] -use std::mem::uninitialized; -use std::ptr::{copy_nonoverlapping, read_unaligned}; - /// Generate a random frame mask. #[inline] pub fn generate_mask() -> [u8; 4] { @@ -18,7 +12,6 @@ pub fn apply_mask(buf: &mut [u8], mask: [u8; 4]) { /// A safe unoptimized mask application. #[inline] -#[allow(dead_code)] fn apply_mask_fallback(buf: &mut [u8], mask: [u8; 4]) { for (i, byte) in buf.iter_mut().enumerate() { *byte ^= mask[i & 3]; @@ -27,21 +20,13 @@ fn apply_mask_fallback(buf: &mut [u8], mask: [u8; 4]) { /// Faster version of `apply_mask()` which operates on 4-byte blocks. #[inline] -#[allow(dead_code, clippy::cast_ptr_alignment)] -fn apply_mask_fast32(buf: &mut [u8], mask: [u8; 4]) { - let mask_u32: u32 = unsafe { read_unaligned(mask.as_ptr() as *const u32) }; - - let mut ptr = buf.as_mut_ptr(); - let mut len = buf.len(); +pub fn apply_mask_fast32(buf: &mut [u8], mask: [u8; 4]) { + let mask_u32 = u32::from_ne_bytes(mask); - // Possible first unaligned block. - let head = min(len, (4 - (ptr as usize & 3)) & 3); + let (mut prefix, words, mut suffix) = unsafe { buf.align_to_mut::() }; + apply_mask_fallback(&mut prefix, mask); + let head = prefix.len() & 3; let mask_u32 = if head > 0 { - unsafe { - xor_mem(ptr, mask_u32, head); - ptr = ptr.add(head); - } - len -= head; if cfg!(target_endian = "big") { mask_u32.rotate_left(8 * head as u32) } else { @@ -50,45 +35,15 @@ fn apply_mask_fast32(buf: &mut [u8], mask: [u8; 4]) { } else { mask_u32 }; - - if len > 0 { - debug_assert_eq!(ptr as usize % 4, 0); - } - - // Properly aligned middle of the data. - while len > 4 { - unsafe { - *(ptr as *mut u32) ^= mask_u32; - ptr = ptr.offset(4); - len -= 4; - } - } - - // Possible last block. - if len > 0 { - unsafe { - xor_mem(ptr, mask_u32, len); - } + for word in words.iter_mut() { + *word ^= mask_u32; } -} - -#[inline] -// TODO: copy_nonoverlapping here compiles to call memcpy. While it is not so inefficient, -// it could be done better. The compiler does not see that len is limited to 3. -unsafe fn xor_mem(ptr: *mut u8, mask: u32, len: usize) { - #[allow(deprecated)] - let mut b: u32 = uninitialized(); - #[allow(trivial_casts)] - copy_nonoverlapping(ptr, &mut b as *mut _ as *mut u8, len); - b ^= mask; - #[allow(trivial_casts)] - copy_nonoverlapping(&b as *const _ as *const u8, ptr, len); + apply_mask_fallback(&mut suffix, mask_u32.to_ne_bytes()); } #[cfg(test)] mod tests { - - use super::{apply_mask_fallback, apply_mask_fast32}; + use super::*; #[test] fn test_apply_mask() { @@ -98,26 +53,21 @@ mod tests { 0x12, 0x03, ]; - // Check masking with proper alignment. - { - let mut masked = unmasked.clone(); - apply_mask_fallback(&mut masked, mask); - - let mut masked_fast = unmasked.clone(); - apply_mask_fast32(&mut masked_fast, mask); - - assert_eq!(masked, masked_fast); - } - - // Check masking without alignment. - { - let mut masked = unmasked.clone(); - apply_mask_fallback(&mut masked[1..], mask); - - let mut masked_fast = unmasked.clone(); - apply_mask_fast32(&mut masked_fast[1..], mask); - - assert_eq!(masked, masked_fast); + for data_len in 0..=unmasked.len() { + let unmasked = &unmasked[0..data_len]; + // Check masking with different alignment. + for off in 0..=3 { + if unmasked.len() < off { + continue; + } + let mut masked = unmasked.to_vec(); + apply_mask_fallback(&mut masked[off..], mask); + + let mut masked_fast = unmasked.to_vec(); + apply_mask_fast32(&mut masked_fast[off..], mask); + + assert_eq!(masked, masked_fast); + } } } }