Fix fast masking of unaligned data

Signed-off-by: Alexey Galakhov <agalakhov@snapview.de>
pull/12/head
Alexey Galakhov 8 years ago
parent 01a9211a84
commit 3abe419e98
  1. 106
      src/protocol/frame/mask.rs

@ -1,4 +1,6 @@
use std::mem::transmute; use std::cmp::min;
use std::mem::uninitialized;
use std::ptr::copy_nonoverlapping;
use rand; use rand;
/// Generate a random frame mask. /// Generate a random frame mask.
@ -10,13 +12,12 @@ pub fn generate_mask() -> [u8; 4] {
/// Mask/unmask a frame. /// Mask/unmask a frame.
#[inline] #[inline]
pub fn apply_mask(buf: &mut [u8], mask: &[u8; 4]) { pub fn apply_mask(buf: &mut [u8], mask: &[u8; 4]) {
// Assume that the memory is 32-bytes aligned. apply_mask_fast32(buf, mask)
// FIXME: this assumption is not correct.
unsafe { apply_mask_aligned32(buf, mask) }
} }
/// A safe unoptimized mask application. /// A safe unoptimized mask application.
#[inline] #[inline]
#[allow(dead_code)]
fn apply_mask_fallback(buf: &mut [u8], mask: &[u8; 4]) { fn apply_mask_fallback(buf: &mut [u8], mask: &[u8; 4]) {
for (i, byte) in buf.iter_mut().enumerate() { for (i, byte) in buf.iter_mut().enumerate() {
*byte ^= mask[i & 3]; *byte ^= mask[i & 3];
@ -24,30 +25,72 @@ fn apply_mask_fallback(buf: &mut [u8], mask: &[u8; 4]) {
} }
/// Faster version of `apply_mask()` which operates on 4-byte blocks. /// Faster version of `apply_mask()` which operates on 4-byte blocks.
///
/// Safety: `buf` must be at least 4-bytes aligned.
#[inline] #[inline]
unsafe fn apply_mask_aligned32(buf: &mut [u8], mask: &[u8; 4]) { #[allow(dead_code)]
debug_assert_eq!(buf.as_ptr() as usize % 4, 0); fn apply_mask_fast32(buf: &mut [u8], mask: &[u8; 4]) {
// TODO replace this with read_unaligned() as it stabilizes.
let mask_u32 = unsafe {
let mut m: u32 = uninitialized();
#[allow(trivial_casts)]
copy_nonoverlapping(mask.as_ptr(), &mut m as *mut _ as *mut u8, 4);
m
};
let mask_u32 = transmute(*mask); let mut ptr = buf.as_mut_ptr();
let mut len = buf.len();
let mut ptr = buf.as_mut_ptr() as *mut u32; // Possible first unaligned block.
for _ in 0..(buf.len() / 4) { let head = min(len, (4 - (ptr as usize & 3)) & 3);
*ptr ^= mask_u32; let mask_u32 = if head > 0 {
ptr = ptr.offset(1); unsafe {
xor_mem(ptr, mask_u32, head);
ptr = ptr.offset(head as isize);
}
len -= head;
if cfg!(target_endian = "big") {
mask_u32.rotate_left(8 * head as u32)
} else {
mask_u32.rotate_right(8 * head as u32)
}
} else {
mask_u32
};
if len > 0 {
debug_assert_eq!(ptr as usize % 4, 0);
}
// Properly aligned middle of the data.
while len > 4 {
unsafe {
*(ptr as *mut u32) ^= mask_u32;
ptr = ptr.offset(4);
len -= 4;
}
}
// Possible last block.
if len > 0 {
unsafe { xor_mem(ptr, mask_u32, len); }
} }
}
// Possible last block with less than 4 bytes. #[inline]
let last_block_start = buf.len() & !3; // TODO: copy_nonoverlapping here compiles to call memcpy. While it is not so inefficient,
let last_block = &mut buf[last_block_start..]; // it could be done better. The compiler does not see that len is limited to 3.
apply_mask_fallback(last_block, mask); unsafe fn xor_mem(ptr: *mut u8, mask: u32, len: usize) {
let mut b: u32 = uninitialized();
#[allow(trivial_casts)]
copy_nonoverlapping(ptr, &mut b as *mut _ as *mut u8, len);
b ^= mask;
#[allow(trivial_casts)]
copy_nonoverlapping(&b as *const _ as *const u8, ptr, len);
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{apply_mask_fallback, apply_mask_aligned32}; use super::{apply_mask_fallback, apply_mask_fast32};
#[test] #[test]
fn test_apply_mask() { fn test_apply_mask() {
@ -55,16 +98,31 @@ mod tests {
0x6d, 0xb6, 0xb2, 0x80, 0x6d, 0xb6, 0xb2, 0x80,
]; ];
let unmasked = vec![ let unmasked = vec![
0xf3, 0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x82, 0xff, 0xfe, 0x00, 0xf3, 0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x82,
0xff, 0xfe, 0x00, 0x17, 0x74, 0xf9, 0x12, 0x03,
]; ];
let mut masked = unmasked.clone(); // Check masking with proper alignment.
apply_mask_fallback(&mut masked, &mask); {
let mut masked = unmasked.clone();
apply_mask_fallback(&mut masked, &mask);
let mut masked_fast = unmasked.clone();
apply_mask_fast32(&mut masked_fast, &mask);
assert_eq!(masked, masked_fast);
}
// Check masking without alignment.
{
let mut masked = unmasked.clone();
apply_mask_fallback(&mut masked[1..], &mask);
let mut masked_aligned = unmasked.clone(); let mut masked_fast = unmasked.clone();
unsafe { apply_mask_aligned32(&mut masked_aligned, &mask) }; apply_mask_fast32(&mut masked_fast[1..], &mask);
assert_eq!(masked, masked_aligned); assert_eq!(masked, masked_fast);
}
} }
} }

Loading…
Cancel
Save