diff --git a/Cargo.toml b/Cargo.toml index 2337bc6..e313c82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,9 +7,9 @@ authors = ["Alexey Galakhov"] license = "MIT" readme = "README.md" homepage = "https://github.com/snapview/tungstenite-rs" -documentation = "https://docs.rs/tungstenite/0.2.0" +documentation = "https://docs.rs/tungstenite/0.2.1" repository = "https://github.com/snapview/tungstenite-rs" -version = "0.2.0" +version = "0.2.1" [features] default = ["tls"] diff --git a/src/protocol/frame/frame.rs b/src/protocol/frame/frame.rs index 3bdc539..352efab 100644 --- a/src/protocol/frame/frame.rs +++ b/src/protocol/frame/frame.rs @@ -8,41 +8,9 @@ use std::result::Result as StdResult; use byteorder::{ByteOrder, ReadBytesExt, NetworkEndian}; use bytes::BufMut; -use rand; - use error::{Error, Result}; use super::coding::{OpCode, Control, Data, CloseCode}; - -fn apply_mask(buf: &mut [u8], mask: &[u8; 4]) { - for (i, byte) in buf.iter_mut().enumerate() { - *byte ^= mask[i & 3]; - } -} - -/// Faster version of `apply_mask()` which operates on 4-byte blocks. -/// -/// Safety: `buf` must be at least 4-bytes aligned. -unsafe fn apply_mask_aligned32(buf: &mut [u8], mask: &[u8; 4]) { - debug_assert_eq!(buf.as_ptr() as usize % 4, 0); - - let mask_u32 = transmute(*mask); - - let mut ptr = buf.as_mut_ptr() as *mut u32; - for _ in 0..(buf.len() / 4) { - *ptr ^= mask_u32; - ptr = ptr.offset(1); - } - - // Possible last block with less than 4 bytes. - let last_block_start = buf.len() & !3; - let last_block = &mut buf[last_block_start..]; - apply_mask(last_block, mask); -} - -#[inline] -fn generate_mask() -> [u8; 4] { - rand::random() -} +use super::mask::{generate_mask, apply_mask}; /// A struct representing the close command. #[derive(Debug, Clone)] @@ -219,10 +187,7 @@ impl Frame { #[inline] pub fn remove_mask(&mut self) { self.mask.and_then(|mask| { - // Assumes Vec's backing memory is at least 4-bytes aligned. - unsafe { - Some(apply_mask_aligned32(&mut self.payload, &mask)) - } + Some(apply_mask(&mut self.payload, &mask)) }); self.mask = None; } @@ -471,10 +436,7 @@ impl Frame { if self.is_masked() { let mask = self.mask.take().unwrap(); - // Assumes Vec's backing memory is at least 4-bytes aligned. - unsafe { - apply_mask_aligned32(&mut self.payload, &mask); - } + apply_mask(&mut self.payload, &mask); try!(w.write(&mask)); } @@ -528,24 +490,6 @@ mod tests { use super::super::coding::{OpCode, Data}; use std::io::Cursor; - #[test] - fn test_apply_mask() { - let mask = [ - 0x6d, 0xb6, 0xb2, 0x80, - ]; - let unmasked = vec![ - 0xf3, 0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x82, 0xff, 0xfe, 0x00, - ]; - - let mut masked = unmasked.clone(); - apply_mask(&mut masked, &mask); - - let mut masked_aligned = unmasked.clone(); - unsafe { apply_mask_aligned32(&mut masked_aligned, &mask) }; - - assert_eq!(masked, masked_aligned); - } - #[test] fn parse() { let mut raw: Cursor> = Cursor::new(vec![ diff --git a/src/protocol/frame/mask.rs b/src/protocol/frame/mask.rs new file mode 100644 index 0000000..32ca225 --- /dev/null +++ b/src/protocol/frame/mask.rs @@ -0,0 +1,129 @@ +use std::cmp::min; +use std::mem::uninitialized; +use std::ptr::copy_nonoverlapping; +use rand; + +/// Generate a random frame mask. +#[inline] +pub fn generate_mask() -> [u8; 4] { + rand::random() +} + +/// Mask/unmask a frame. +#[inline] +pub fn apply_mask(buf: &mut [u8], mask: &[u8; 4]) { + apply_mask_fast32(buf, mask) +} + +/// A safe unoptimized mask application. +#[inline] +#[allow(dead_code)] +fn apply_mask_fallback(buf: &mut [u8], mask: &[u8; 4]) { + for (i, byte) in buf.iter_mut().enumerate() { + *byte ^= mask[i & 3]; + } +} + +/// Faster version of `apply_mask()` which operates on 4-byte blocks. +#[inline] +#[allow(dead_code)] +fn apply_mask_fast32(buf: &mut [u8], mask: &[u8; 4]) { + // TODO replace this with read_unaligned() as it stabilizes. + let mask_u32 = unsafe { + let mut m: u32 = uninitialized(); + #[allow(trivial_casts)] + copy_nonoverlapping(mask.as_ptr(), &mut m as *mut _ as *mut u8, 4); + m + }; + + let mut ptr = buf.as_mut_ptr(); + let mut len = buf.len(); + + // Possible first unaligned block. + let head = min(len, (4 - (ptr as usize & 3)) & 3); + let mask_u32 = if head > 0 { + unsafe { + xor_mem(ptr, mask_u32, head); + ptr = ptr.offset(head as isize); + } + len -= head; + if cfg!(target_endian = "big") { + mask_u32.rotate_left(8 * head as u32) + } else { + mask_u32.rotate_right(8 * head as u32) + } + } else { + mask_u32 + }; + + if len > 0 { + debug_assert_eq!(ptr as usize % 4, 0); + } + + // Properly aligned middle of the data. + while len > 4 { + unsafe { + *(ptr as *mut u32) ^= mask_u32; + ptr = ptr.offset(4); + len -= 4; + } + } + + // Possible last block. + if len > 0 { + unsafe { xor_mem(ptr, mask_u32, len); } + } +} + +#[inline] +// TODO: copy_nonoverlapping here compiles to call memcpy. While it is not so inefficient, +// it could be done better. The compiler does not see that len is limited to 3. +unsafe fn xor_mem(ptr: *mut u8, mask: u32, len: usize) { + let mut b: u32 = uninitialized(); + #[allow(trivial_casts)] + copy_nonoverlapping(ptr, &mut b as *mut _ as *mut u8, len); + b ^= mask; + #[allow(trivial_casts)] + copy_nonoverlapping(&b as *const _ as *const u8, ptr, len); +} + +#[cfg(test)] +mod tests { + + use super::{apply_mask_fallback, apply_mask_fast32}; + + #[test] + fn test_apply_mask() { + let mask = [ + 0x6d, 0xb6, 0xb2, 0x80, + ]; + let unmasked = vec![ + 0xf3, 0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x82, + 0xff, 0xfe, 0x00, 0x17, 0x74, 0xf9, 0x12, 0x03, + ]; + + // Check masking with proper alignment. + { + let mut masked = unmasked.clone(); + apply_mask_fallback(&mut masked, &mask); + + let mut masked_fast = unmasked.clone(); + apply_mask_fast32(&mut masked_fast, &mask); + + assert_eq!(masked, masked_fast); + } + + // Check masking without alignment. + { + let mut masked = unmasked.clone(); + apply_mask_fallback(&mut masked[1..], &mask); + + let mut masked_fast = unmasked.clone(); + apply_mask_fast32(&mut masked_fast[1..], &mask); + + assert_eq!(masked, masked_fast); + } + } + +} + diff --git a/src/protocol/frame/mod.rs b/src/protocol/frame/mod.rs index 8973db7..4b6a711 100644 --- a/src/protocol/frame/mod.rs +++ b/src/protocol/frame/mod.rs @@ -3,6 +3,7 @@ pub mod coding; mod frame; +mod mask; pub use self::frame::Frame; pub use self::frame::CloseFrame;