From 47fadea431dc25f55574140fef9bd2a07c7adedd Mon Sep 17 00:00:00 2001 From: Pierre Chifflier Date: Mon, 25 Nov 2024 12:36:33 +0100 Subject: [PATCH] Remove fasthash and use toeplitz hash to dispatch jobs (Closes #57) --- libpcap-analyzer/Cargo.toml | 1 - libpcap-analyzer/src/threaded_analyzer.rs | 31 ++--- libpcap-analyzer/src/toeplitz.rs | 157 ++++++++++++++++++---- 3 files changed, 142 insertions(+), 47 deletions(-) diff --git a/libpcap-analyzer/Cargo.toml b/libpcap-analyzer/Cargo.toml index 8ca1256..3b718b6 100644 --- a/libpcap-analyzer/Cargo.toml +++ b/libpcap-analyzer/Cargo.toml @@ -35,7 +35,6 @@ plugin_tls_stats = ["rusticata","tls-parser"] base16ct = { version="0.2", features=["alloc"], optional=true } base64ct = { version="1.5", features=["alloc"], optional=true } crossbeam-channel = "0.5" -fasthash = "0.4" fnv = "1.0" indexmap = { version="2.2", features=["serde"] } lazy_static = "1.2" diff --git a/libpcap-analyzer/src/threaded_analyzer.rs b/libpcap-analyzer/src/threaded_analyzer.rs index ac605d1..2256b8d 100644 --- a/libpcap-analyzer/src/threaded_analyzer.rs +++ b/libpcap-analyzer/src/threaded_analyzer.rs @@ -1,6 +1,7 @@ use crate::analyzer::{handle_l3, run_plugins_v2_link, run_plugins_v2_physical, Analyzer}; use crate::layers::LinkLayerType; use crate::plugin_registry::PluginRegistry; +use crate::toeplitz; use crossbeam_channel::{unbounded, Receiver, Sender}; use libpcap_tools::*; use pcap_parser::data::PacketData; @@ -220,16 +221,10 @@ fn fan_out(data: &[u8], ethertype: EtherType, n_workers: usize) -> usize { match ethertype { EtherTypes::Ipv4 => { if data.len() >= 20 { - // let src = &data[12..15]; - // let dst = &data[16..19]; - // let proto = data[9]; - // (src[0] ^ dst[0] ^ proto) as usize % n_workers let mut buf: [u8; 20] = [0; 20]; - let sz = 4; - buf[0] = data[12] ^ data[16]; - buf[1] = data[13] ^ data[17]; - buf[2] = data[14] ^ data[18]; - buf[3] = data[15] ^ data[19]; + let sz = 8; + let src_dst_addrs = &data[12..20]; + buf[0..sz].copy_from_slice(src_dst_addrs); // we may append source and destination ports // XXX breaks fragmentation // if data[9] == crate::plugin::TRANSPORT_TCP || data[9] == crate::plugin::TRANSPORT_UDP { @@ -243,8 +238,8 @@ fn fan_out(data: &[u8], ethertype: EtherType, n_workers: usize) -> usize { // sz = 12; // } // } - // let hash = crate::toeplitz::toeplitz_hash(crate::toeplitz::KEY, &buf[..sz]); - let hash = fasthash::metro::hash64(&buf[..sz]); + let hash = toeplitz::toeplitz_hash(toeplitz::SYMMETRIC_KEY, &buf[..sz]); + // debug!("{:?} -- hash --> 0x{:x}", buf, hash); // ((hash >> 24) ^ (hash & 0xff)) as usize % n_workers hash as usize % n_workers @@ -255,13 +250,9 @@ fn fan_out(data: &[u8], ethertype: EtherType, n_workers: usize) -> usize { EtherTypes::Ipv6 => { if data.len() >= 40 { let mut buf: [u8; 40] = [0; 40]; - // let sz = 32; - // source IP + destination IP, in network-order - // buf[0..32].copy_from_slice(&data[8..40]); - let sz = 16; - for i in 0..16 { - buf[i] = data[8 + i] ^ data[24 + i]; - } + let sz = 40; + let src_dst_addrs = &data[8..40]; + buf[0..sz].copy_from_slice(src_dst_addrs); // we may append source and destination ports // XXX breaks fragmentation // if data[6] == crate::plugin::TRANSPORT_TCP || data[6] == crate::plugin::TRANSPORT_UDP { @@ -275,8 +266,8 @@ fn fan_out(data: &[u8], ethertype: EtherType, n_workers: usize) -> usize { // sz += 4; // } // } - // let hash = crate::toeplitz::toeplitz_hash(crate::toeplitz::KEY, &buf[..sz]); - let hash = fasthash::metro::hash64(&buf[..sz]); + let hash = toeplitz::toeplitz_hash(toeplitz::SYMMETRIC_KEY, &buf[..sz]); + // debug!("{:?} -- hash --> 0x{:x}", buf, hash); // ((hash >> 24) ^ (hash & 0xff)) as usize % n_workers hash as usize % n_workers diff --git a/libpcap-analyzer/src/toeplitz.rs b/libpcap-analyzer/src/toeplitz.rs index d3dcd5a..c3477e9 100644 --- a/libpcap-analyzer/src/toeplitz.rs +++ b/libpcap-analyzer/src/toeplitz.rs @@ -2,26 +2,27 @@ /// first 16 bytes, which is all that's required for IPv4. pub const RSS_KEYSIZE: usize = 40; -// // original Microsoft's key -// pub const KEY_OLD : &[u8] = &[ -// 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, -// 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, -// 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, -// 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, -// 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, -// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -// 0x00, 0x00, 0x00, 0x00 -// ]; +// original Microsoft's key +#[rustfmt::skip] +pub const DEFAULT_KEY : &[u8] = &[ + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +]; // key from http://www.ndsl.kaist.edu/~shinae/papers/TR-symRSS.pdf // // Let’s assume we have a frame IP source: 1.1.1.1, IP destination: 2.2.2.2 and UDP port 22 to udp -// port 55. This means that the input for the hash function of the 4 tupples will be: +// port 55. This means that the input for the hash function of the 4 tuples will be: // [1.1.1.1][2.2.2.2][22][55] and for the opposite direction: [2.2.2.2][1.1.1.1][55][22]. To // support the same hash value for these two inputs, the first 32bit of the key need to be // identical to the second 32bit, and the 16bit afterwards should be identical to the next 16bit. #[rustfmt::skip] -pub const KEY : &[u8] = &[ +pub const SYMMETRIC_KEY : &[u8] = &[ 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, @@ -54,25 +55,129 @@ pub fn toeplitz_hash(key: &[u8], data: &[u8]) -> u32 { #[cfg(test)] mod tests { - use super::toeplitz_hash; - // original Microsoft's key - #[rustfmt::skip] - pub const KEY_OLD : &[u8] = &[ - 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, - 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, - 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, - 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, - 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 - ]; + use super::*; + use std::{ + net::{IpAddr, Ipv4Addr, Ipv6Addr}, + str::FromStr, + }; + #[test] fn toeplitz_hash_test() { const DATA1: &[u8] = &[66, 9, 149, 187, 161, 142, 100, 80, 10, 234, 6, 230]; - let res = toeplitz_hash(KEY_OLD, DATA1); + let res = toeplitz_hash(DEFAULT_KEY, DATA1); assert_eq!(res, 0x51cc_c178); const DATA2: &[u8] = &[199, 92, 111, 2, 65, 69, 140, 83, 55, 150, 18, 131]; - let res = toeplitz_hash(KEY_OLD, DATA2); + let res = toeplitz_hash(DEFAULT_KEY, DATA2); assert_eq!(res, 0xc626_b0ea); } + + // test vectors inspired from https://github.com/sarub0b0/toeplitz-hash/blob/master/toeplitz_hash.cc + #[derive(Debug)] + struct TestVector { + src_addr: IpAddr, + dst_addr: IpAddr, + src_port: u16, + dst_port: u16, + with_tcp_hash: u32, + without_tcp_hash: u32, + } + + fn create_test_vectors() -> Vec { + let mut v = Vec::new(); + + const TESTV4_1: TestVector = TestVector { + src_addr: IpAddr::V4(Ipv4Addr::new(66, 9, 149, 187)), + dst_addr: IpAddr::V4(Ipv4Addr::new(161, 142, 100, 80)), + src_port: 2794, + dst_port: 1766, + with_tcp_hash: 0x51c_cc178, + without_tcp_hash: 0x323_e8fc2, + }; + v.push(TESTV4_1); + + let testv6_1: TestVector = TestVector { + src_addr: IpAddr::V6(Ipv6Addr::from_str("3ffe:2501:200:1fff::7").unwrap()), + dst_addr: IpAddr::V6(Ipv6Addr::from_str("3ffe:2501:200:3::1").unwrap()), + src_port: 2794, + dst_port: 1766, + with_tcp_hash: 0x4020_7d3d, + without_tcp_hash: 0x2cc18cd5, + }; + v.push(testv6_1); + + v + } + + #[rustfmt::skip] + fn prepare_buffer(src_addr: IpAddr, dst_addr: IpAddr, src_port: u16, dst_port: u16) -> ([u8;40], usize) { + let mut buf = [0u8; 40]; + let sz = match src_addr { + IpAddr::V4(v4) => { buf[..4].copy_from_slice(&v4.octets()); 4 }, + IpAddr::V6(v6) => { buf[..16].copy_from_slice(&v6.octets()); 16 }, + }; + let sz = match dst_addr { + IpAddr::V4(v4) => { buf[4..8].copy_from_slice(&v4.octets()); sz+4 }, + IpAddr::V6(v6) => { buf[16..32].copy_from_slice(&v6.octets()); sz+16 }, + }; + buf[sz ] = ((src_port & 0xff00) >> 8) as u8; + buf[sz + 1] = (src_port & 0x00ff) as u8; + buf[sz + 2] = ((dst_port & 0xff00) >> 8) as u8; + buf[sz + 3] = (dst_port & 0x00ff) as u8; + + (buf, sz + 4) + } + + #[test] + fn toeplitz_test_vectors() { + let test_vectors = create_test_vectors(); + for v in &test_vectors { + // println!("{:?}", v); + let (buf, sz) = prepare_buffer(v.src_addr, v.dst_addr, v.src_port, v.dst_port); + let without_tcp_hash = toeplitz_hash(DEFAULT_KEY, &buf[..sz - 4]); + // println!("{:02x?}", without_tcp_hash); + assert_eq!(without_tcp_hash, v.without_tcp_hash); + + let with_tcp_hash = toeplitz_hash(DEFAULT_KEY, &buf[..sz]); + // println!("{:02x?}", with_tcp_hash); + assert_eq!(with_tcp_hash, v.with_tcp_hash); + } + } + + // Test hash symmetry + // Note that we use a different key for hashes (default one creates symmetric hashes for + // IPv4/IPv6 only, but not when adding ports + #[test] + fn toeplitz_hash_symmetry() { + let test_vectors = create_test_vectors(); + for v in &test_vectors { + let v_sym = TestVector { + dst_addr: v.dst_addr, + src_addr: v.src_addr, + src_port: v.dst_port, + dst_port: v.src_port, + with_tcp_hash: v.with_tcp_hash, + without_tcp_hash: v.without_tcp_hash, + }; + + let (buf, sz) = prepare_buffer(v.src_addr, v.dst_addr, v.src_port, v.dst_port); + let without_tcp_hash = toeplitz_hash(SYMMETRIC_KEY, &buf[..sz - 4]); + let (buf2, sz2) = prepare_buffer( + v_sym.src_addr, + v_sym.dst_addr, + v_sym.src_port, + v_sym.dst_port, + ); + let without_tcp_hash_sym = toeplitz_hash(SYMMETRIC_KEY, &buf2[..sz2 - 4]); + // println!("{:02x?}", without_tcp_hash); + assert_eq!( + without_tcp_hash, without_tcp_hash_sym, + "Symmetry without ports" + ); + + let with_tcp_hash = toeplitz_hash(SYMMETRIC_KEY, &buf[..sz]); + let with_tcp_hash_sym = toeplitz_hash(SYMMETRIC_KEY, &buf2[..sz2]); + // println!("{:02x?}", with_tcp_hash); + assert_eq!(with_tcp_hash, with_tcp_hash_sym, "Symmetry with ports"); + } + } }