From e7a234a63cb658bfe2d2f14182736da48d7635e5 Mon Sep 17 00:00:00 2001 From: tyurek Date: Sat, 7 Dec 2024 22:29:46 -0800 Subject: [PATCH] Reduce Serialized Size of HybridInfo (#1485) This PR does a few things 1) Changes the codebase to assume helper_origin is a static string and does not need to be stored anywhere. It is removed from structs and outputs and simply grabbed from the place where it's defined if needed. 2) Additionally removes DOMAIN from serialized HybridInfo bytes. 3) Adds a second "to_bytes" method for HybridInfo structs. While we don't need hybrid_info and DOMAIN to deserialize HybridInfo, they are needed when we're actually encrypting/decrypting. For those purposes, a new to_enc_bytes() function is added. --- ipa-core/src/cli/crypto/hybrid_decrypt.rs | 9 +- ipa-core/src/cli/crypto/hybrid_encrypt.rs | 4 - ipa-core/src/cli/csv.rs | 9 +- ipa-core/src/cli/playbook/input.rs | 10 +- ipa-core/src/protocol/hybrid/agg.rs | 15 -- ipa-core/src/protocol/hybrid/oprf.rs | 6 - ipa-core/src/report/hybrid.rs | 90 +++----- ipa-core/src/report/hybrid_info.rs | 202 +++++++----------- ipa-core/src/test_fixture/hybrid.rs | 22 +- ipa-core/src/test_fixture/hybrid_event_gen.rs | 2 - 10 files changed, 113 insertions(+), 256 deletions(-) diff --git a/ipa-core/src/cli/crypto/hybrid_decrypt.rs b/ipa-core/src/cli/crypto/hybrid_decrypt.rs index ed5747384..be705e062 100644 --- a/ipa-core/src/cli/crypto/hybrid_decrypt.rs +++ b/ipa-core/src/cli/crypto/hybrid_decrypt.rs @@ -126,12 +126,8 @@ impl HybridDecryptArgs { .reconstruct() .as_u128(); let key_id = impression_report1.info.key_id; - let helper_origin = impression_report1.info.helper_origin; - writeln!( - writer, - "i,{match_key},{breakdown_key},{key_id},{helper_origin}" - )?; + writeln!(writer, "i,{match_key},{breakdown_key},{key_id}")?; } ( HybridReport::Conversion(conversion_report1), @@ -154,12 +150,11 @@ impl HybridDecryptArgs { .reconstruct() .as_u128(); let key_id = conversion_report1.info.key_id; - let helper_origin = conversion_report1.info.helper_origin; let conversion_site_domain = conversion_report1.info.conversion_site_domain; let timestamp = conversion_report1.info.timestamp; let epsilon = conversion_report1.info.epsilon; let sensitivity = conversion_report1.info.sensitivity; - writeln!(writer, "c,{match_key},{value},{key_id},{helper_origin},{conversion_site_domain},{timestamp},{epsilon},{sensitivity}")?; + writeln!(writer, "c,{match_key},{value},{key_id},{conversion_site_domain},{timestamp},{epsilon},{sensitivity}")?; } _ => { panic!("Reports are not all the same type"); diff --git a/ipa-core/src/cli/crypto/hybrid_encrypt.rs b/ipa-core/src/cli/crypto/hybrid_encrypt.rs index e7d903e20..818ae8c2f 100644 --- a/ipa-core/src/cli/crypto/hybrid_encrypt.rs +++ b/ipa-core/src/cli/crypto/hybrid_encrypt.rs @@ -300,14 +300,12 @@ mod tests { #[tokio::test] async fn try_encrypting_something() { - let helper_origin = "HELPER_ORIGIN".to_string(); let conversion_site_domain = "meta.com".to_string(); let records = vec![ TestHybridRecord::TestConversion { match_key: 12345, value: 2, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 100, epsilon: 0.0, @@ -317,7 +315,6 @@ mod tests { match_key: 12345, value: 5, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 101, epsilon: 0.0, @@ -327,7 +324,6 @@ mod tests { match_key: 23456, breakdown_key: 4, key_id: 0, - helper_origin: helper_origin.clone(), }, ]; let mut input_file = NamedTempFile::new().unwrap(); diff --git a/ipa-core/src/cli/csv.rs b/ipa-core/src/cli/csv.rs index 9ea132173..37190ed36 100644 --- a/ipa-core/src/cli/csv.rs +++ b/ipa-core/src/cli/csv.rs @@ -29,24 +29,19 @@ impl Serializer for crate::test_fixture::hybrid::TestHybridRecord { match_key, breakdown_key, key_id, - helper_origin, } => { - write!( - buf, - "i,{match_key},{breakdown_key},{key_id},{helper_origin}" - )?; + write!(buf, "i,{match_key},{breakdown_key},{key_id}")?; } crate::test_fixture::hybrid::TestHybridRecord::TestConversion { match_key, value, key_id, - helper_origin, conversion_site_domain, timestamp, epsilon, sensitivity, } => { - write!(buf, "c,{match_key},{value},{key_id},{helper_origin},{conversion_site_domain},{timestamp},{epsilon},{sensitivity}")?; + write!(buf, "c,{match_key},{value},{key_id},{conversion_site_domain},{timestamp},{epsilon},{sensitivity}")?; } } diff --git a/ipa-core/src/cli/playbook/input.rs b/ipa-core/src/cli/playbook/input.rs index 85bdac631..ed9eddce5 100644 --- a/ipa-core/src/cli/playbook/input.rs +++ b/ipa-core/src/cli/playbook/input.rs @@ -62,9 +62,7 @@ impl InputItem for TestHybridRecord { let event_type = s.chars().nth(0).unwrap(); match event_type { 'i' => { - if let [_, match_key, number, key_id, helper_origin] = - s.splitn(5, ',').collect::>()[..] - { + if let [_, match_key, number, key_id] = s.splitn(4, ',').collect::>()[..] { let match_key: u64 = match_key .parse() .unwrap_or_else(|e| panic!("Expected a u64, got {match_key}: {e}")); @@ -80,7 +78,6 @@ impl InputItem for TestHybridRecord { match_key, breakdown_key: number, key_id, - helper_origin: helper_origin.to_string(), } } else { panic!("{s} is not a valid {}", type_name::()) @@ -88,8 +85,8 @@ impl InputItem for TestHybridRecord { } 'c' => { - if let [_, match_key, number, key_id, helper_origin, conversion_site_domain, timestamp, epsilon, sensitivity] = - s.splitn(9, ',').collect::>()[..] + if let [_, match_key, number, key_id, conversion_site_domain, timestamp, epsilon, sensitivity] = + s.splitn(8, ',').collect::>()[..] { let match_key: u64 = match_key .parse() @@ -118,7 +115,6 @@ impl InputItem for TestHybridRecord { match_key, value: number, key_id, - helper_origin: helper_origin.to_string(), conversion_site_domain: conversion_site_domain.to_string(), timestamp, epsilon, diff --git a/ipa-core/src/protocol/hybrid/agg.rs b/ipa-core/src/protocol/hybrid/agg.rs index 9b2113991..cfaef9fe1 100644 --- a/ipa-core/src/protocol/hybrid/agg.rs +++ b/ipa-core/src/protocol/hybrid/agg.rs @@ -196,20 +196,17 @@ pub mod test { #[allow(clippy::too_many_lines)] fn get_records() -> Vec { - let helper_origin = "HELPER_ORIGIN".to_string(); let conversion_site_domain = "meta.com".to_string(); let shard1_records = [ TestHybridRecord::TestImpression { match_key: SHARD1_MKS[0], breakdown_key: 45, key_id: 0, - helper_origin: helper_origin.clone(), }, TestHybridRecord::TestConversion { match_key: SHARD1_MKS[1], value: 1, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 102, epsilon: 0.0, @@ -219,7 +216,6 @@ pub mod test { match_key: SHARD1_MKS[2], value: 3, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 103, epsilon: 0.0, @@ -229,7 +225,6 @@ pub mod test { match_key: SHARD1_MKS[3], value: 4, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 104, epsilon: 0.0, @@ -239,19 +234,16 @@ pub mod test { match_key: SHARD1_MKS[4], breakdown_key: 1, key_id: 0, - helper_origin: helper_origin.clone(), }, // duplicated impression with same match_key TestHybridRecord::TestImpression { match_key: SHARD1_MKS[4], breakdown_key: 2, key_id: 0, - helper_origin: helper_origin.clone(), }, // duplicated impression with same match_key TestHybridRecord::TestConversion { match_key: SHARD1_MKS[5], value: 7, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 105, epsilon: 0.0, @@ -263,13 +255,11 @@ pub mod test { match_key: SHARD2_MKS[0], breakdown_key: 56, key_id: 0, - helper_origin: helper_origin.clone(), }, TestHybridRecord::TestConversion { match_key: SHARD2_MKS[1], value: 2, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 100, epsilon: 0.0, @@ -279,13 +269,11 @@ pub mod test { match_key: SHARD2_MKS[2], breakdown_key: 78, key_id: 0, - helper_origin: helper_origin.clone(), }, // NOT attributed TestHybridRecord::TestConversion { match_key: SHARD2_MKS[3], value: 5, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 101, epsilon: 0.0, @@ -295,13 +283,11 @@ pub mod test { match_key: SHARD2_MKS[4], breakdown_key: 90, key_id: 0, - helper_origin: helper_origin.clone(), }, // attributed twice, removed TestHybridRecord::TestConversion { match_key: SHARD2_MKS[5], value: 6, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 102, epsilon: 0.0, @@ -311,7 +297,6 @@ pub mod test { match_key: SHARD2_MKS[6], value: 7, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 103, epsilon: 0.0, diff --git a/ipa-core/src/protocol/hybrid/oprf.rs b/ipa-core/src/protocol/hybrid/oprf.rs index 7ededb91e..69a6d593e 100644 --- a/ipa-core/src/protocol/hybrid/oprf.rs +++ b/ipa-core/src/protocol/hybrid/oprf.rs @@ -231,19 +231,16 @@ mod test { match_key: 12345, breakdown_key: 2, key_id: 0, - helper_origin: "HELPER_ORIGIN".to_string(), }, TestHybridRecord::TestImpression { match_key: 68362, breakdown_key: 1, key_id: 0, - helper_origin: "HELPER_ORIGIN".to_string(), }, TestHybridRecord::TestConversion { match_key: 12345, value: 5, key_id: 0, - helper_origin: "HELPER_ORIGIN".to_string(), conversion_site_domain: "meta.com".to_string(), timestamp: 100, epsilon: 0.0, @@ -253,7 +250,6 @@ mod test { match_key: 68362, value: 2, key_id: 0, - helper_origin: "HELPER_ORIGIN".to_string(), conversion_site_domain: "meta.com".to_string(), timestamp: 102, epsilon: 0.0, @@ -263,13 +259,11 @@ mod test { match_key: 68362, breakdown_key: 1, key_id: 0, - helper_origin: "HELPER_ORIGIN".to_string(), }, TestHybridRecord::TestConversion { match_key: 68362, value: 7, key_id: 0, - helper_origin: "HELPER_ORIGIN".to_string(), conversion_site_domain: "meta.com".to_string(), timestamp: 104, epsilon: 0.0, diff --git a/ipa-core/src/report/hybrid.rs b/ipa-core/src/report/hybrid.rs index eeada7823..bcaca20aa 100644 --- a/ipa-core/src/report/hybrid.rs +++ b/ipa-core/src/report/hybrid.rs @@ -59,7 +59,7 @@ use crate::{ // TODO(679): This needs to come from configuration. #[allow(dead_code)] -static HELPER_ORIGIN: &str = "github.com/private-attribution"; +pub static HELPER_ORIGIN: &str = "github.com/private-attribution"; pub type KeyIdentifier = u8; pub const DEFAULT_KEY_ID: KeyIdentifier = 0; @@ -185,8 +185,7 @@ where /// # Panics /// If report length does not fit in `u16`. pub fn encrypted_len(&self) -> u16 { - // Todo: get this more efficiently - self.ciphertext_len() + u16::try_from(self.info.to_bytes().len()).unwrap() + self.ciphertext_len() + u16::try_from(self.info.byte_len()).unwrap() } /// # Errors @@ -233,19 +232,20 @@ where .serialize(GenericArray::from_mut_slice(&mut plaintext_btt[..])); let pk = key_registry.public_key(key_id).ok_or(CryptError::NoSuchKey(key_id))?; + let info_enc_bytes = self.info.to_enc_bytes(); let info_bytes = self.info.to_bytes(); let (encap_key_mk, ciphertext_mk, tag_mk) = seal_in_place( pk, plaintext_mk.as_mut(), - &info_bytes, + &info_enc_bytes, rng, )?; let (encap_key_btt, ciphertext_btt, tag_btt) = seal_in_place( pk, plaintext_btt.as_mut(), - &info_bytes, + &info_enc_bytes, rng, )?; @@ -333,8 +333,7 @@ where /// # Panics /// If report length does not fit in `u16`. pub fn encrypted_len(&self) -> u16 { - // Todo: get this more efficiently - self.ciphertext_len() + u16::try_from(self.info.to_bytes().len()).unwrap() + self.ciphertext_len() + u16::try_from(self.info.byte_len()).unwrap() } /// # Errors @@ -382,19 +381,20 @@ where .serialize(GenericArray::from_mut_slice(&mut plaintext_btt[..])); let pk = key_registry.public_key(key_id).ok_or(CryptError::NoSuchKey(key_id))?; + let info_enc_bytes = self.info.to_enc_bytes(); let info_bytes = self.info.to_bytes(); let (encap_key_mk, ciphertext_mk, tag_mk) = seal_in_place( pk, plaintext_mk.as_mut(), - &info_bytes, + &info_enc_bytes, rng, )?; let (encap_key_btt, ciphertext_btt, tag_btt) = seal_in_place( pk, plaintext_btt.as_mut(), - &info_bytes, + &info_enc_bytes, rng, )?; @@ -590,11 +590,13 @@ where HybridImpressionInfo::from_bytes(&self.data[Self::INFO_OFFSET..]).map_err(|e| { InvalidHybridReportError::DeserializationError("HybridImpressionInfo", e.into()) })?; - let plaintext_mk = open_in_place(sk, self.encap_key_mk(), &mut ct_mk, &info.to_bytes())?; + let info_enc_bytes = info.to_enc_bytes(); + + let plaintext_mk = open_in_place(sk, self.encap_key_mk(), &mut ct_mk, &info_enc_bytes)?; let mut ct_btt: GenericArray> = GenericArray::from_slice(self.btt_ciphertext()).clone(); - let plaintext_btt = open_in_place(sk, self.encap_key_btt(), &mut ct_btt, &info.to_bytes())?; + let plaintext_btt = open_in_place(sk, self.encap_key_btt(), &mut ct_btt, &info_enc_bytes)?; Ok(HybridImpressionReport:: { match_key: Replicated::::deserialize_infallible(GenericArray::from_slice( @@ -693,11 +695,12 @@ where HybridConversionInfo::from_bytes(&self.data[Self::INFO_OFFSET..]).map_err(|e| { InvalidHybridReportError::DeserializationError("HybridConversionInfo", e.into()) })?; + let info_enc_bytes = info.to_enc_bytes(); - let plaintext_mk = open_in_place(sk, self.encap_key_mk(), &mut ct_mk, &info.to_bytes())?; + let plaintext_mk = open_in_place(sk, self.encap_key_mk(), &mut ct_mk, &info_enc_bytes)?; let mut ct_btt: GenericArray> = GenericArray::from_slice(self.btt_ciphertext()).clone(); - let plaintext_btt = open_in_place(sk, self.encap_key_btt(), &mut ct_btt, &info.to_bytes())?; + let plaintext_btt = open_in_place(sk, self.encap_key_btt(), &mut ct_btt, &info_enc_bytes)?; Ok(HybridConversionReport:: { match_key: Replicated::::deserialize_infallible(GenericArray::from_slice( @@ -1236,7 +1239,6 @@ mod test { EncryptedHybridImpressionReport, EncryptedHybridReport, GenericArray, HybridConversionReport, HybridImpressionReport, HybridReport, IndistinguishableHybridReport, PrfHybridReport, UniqueTag, UniqueTagValidator, - HELPER_ORIGIN, }; use crate::{ error::Error, @@ -1246,7 +1248,7 @@ mod test { }, hpke::{KeyPair, KeyRegistry}, report::{ - hybrid::{EncryptedHybridConversionReport, HybridEventType, NonAsciiStringError}, + hybrid::{EncryptedHybridConversionReport, HybridEventType}, hybrid_info::{HybridConversionInfo, HybridImpressionInfo}, }, secret_sharing::replicated::{ @@ -1265,7 +1267,7 @@ mod test { HybridReport::Impression(HybridImpressionReport:: { match_key: AdditiveShare::new(rng.gen(), rng.gen()), breakdown_key: AdditiveShare::new(rng.gen(), rng.gen()), - info: HybridImpressionInfo::new(0, "HelperOrigin").unwrap(), + info: HybridImpressionInfo::new(0), }) } HybridEventType::Conversion => { @@ -1274,7 +1276,6 @@ mod test { value: AdditiveShare::new(rng.gen(), rng.gen()), info: HybridConversionInfo::new( 0, - "HelperOrigin", "https://www.example2.com", rng.gen(), 0.0, @@ -1307,15 +1308,8 @@ mod test { let conversion_report = HybridConversionReport:: { match_key: AdditiveShare::new(rng.gen(), rng.gen()), value: AdditiveShare::new(rng.gen(), rng.gen()), - info: HybridConversionInfo::new( - 0, - "HelperOrigin", - "https://www.example2.com", - 1_234_567, - 0.0, - 0.0, - ) - .unwrap(), + info: HybridConversionInfo::new(0, "https://www.example2.com", 1_234_567, 0.0, 0.0) + .unwrap(), }; let indistinguishable_report: IndistinguishableHybridReport = conversion_report.clone().into(); @@ -1345,7 +1339,7 @@ mod test { let impression_report = HybridImpressionReport:: { match_key: AdditiveShare::new(rng.gen(), rng.gen()), breakdown_key: AdditiveShare::new(rng.gen(), rng.gen()), - info: HybridImpressionInfo::new(0, "HelperOrigin").unwrap(), + info: HybridImpressionInfo::new(0), }; let indistinguishable_report: IndistinguishableHybridReport = impression_report.clone().into(); @@ -1395,7 +1389,7 @@ mod test { let hybrid_impression_report = HybridImpressionReport:: { match_key: AdditiveShare::new(rng.gen(), rng.gen()), breakdown_key: AdditiveShare::new(rng.gen(), rng.gen()), - info: HybridImpressionInfo::new(0, "HelperOrigin").unwrap(), + info: HybridImpressionInfo::new(0), }; let mut hybrid_impression_report_bytes = Vec::with_capacity(HybridImpressionReport::::serialized_len()); @@ -1414,15 +1408,8 @@ mod test { let hybrid_conversion_report = HybridConversionReport:: { match_key: AdditiveShare::new(rng.gen(), rng.gen()), value: AdditiveShare::new(rng.gen(), rng.gen()), - info: HybridConversionInfo::new( - 0, - "HelperOrigin", - "https://www.example2.com", - 1_234_567, - 0.0, - 0.0, - ) - .unwrap(), + info: HybridConversionInfo::new(0, "https://www.example2.com", 1_234_567, 0.0, 0.0) + .unwrap(), }; let mut hybrid_conversion_report_bytes = Vec::with_capacity(HybridImpressionReport::::serialized_len()); @@ -1444,7 +1431,7 @@ mod test { let hybrid_impression_report = HybridImpressionReport:: { match_key: AdditiveShare::new(rng.gen(), rng.gen()), breakdown_key: AdditiveShare::new(rng.gen(), rng.gen()), - info: HybridImpressionInfo::new(key_id, HELPER_ORIGIN).unwrap(), + info: HybridImpressionInfo::new(key_id), }; let enc_report_bytes = hybrid_impression_report @@ -1467,15 +1454,7 @@ mod test { let hybrid_conversion_report = HybridConversionReport:: { match_key: AdditiveShare::new(rng.gen(), rng.gen()), value: AdditiveShare::new(rng.gen(), rng.gen()), - info: HybridConversionInfo::new( - 0, - HELPER_ORIGIN, - "meta.com", - 1_729_707_432, - 5.0, - 1.1, - ) - .unwrap(), + info: HybridConversionInfo::new(0, "meta.com", 1_729_707_432, 5.0, 1.1).unwrap(), }; let key_registry = KeyRegistry::::random(1, &mut rng); @@ -1522,15 +1501,7 @@ mod test { let hybrid_conversion_report = HybridConversionReport:: { match_key: AdditiveShare::new(rng.gen(), rng.gen()), value: AdditiveShare::new(rng.gen(), rng.gen()), - info: HybridConversionInfo::new( - 0, - "HELPER_ORIGIN", - "meta.com", - 1_729_707_432, - 5.0, - 1.1, - ) - .unwrap(), + info: HybridConversionInfo::new(0, "meta.com", 1_729_707_432, 5.0, 1.1).unwrap(), }; let key_registry = KeyRegistry::::random(1, &mut rng); @@ -1574,13 +1545,6 @@ mod test { }); } - #[test] - fn non_ascii_string() { - let non_ascii_string = "☃️☃️☃️"; - let err = HybridImpressionInfo::new(0, non_ascii_string).unwrap_err(); - assert!(matches!(err, NonAsciiStringError(_))); - } - #[test] fn serde() { run_random(|mut rng| async move { diff --git a/ipa-core/src/report/hybrid_info.rs b/ipa-core/src/report/hybrid_info.rs index 1fdee599d..31da9f68e 100644 --- a/ipa-core/src/report/hybrid_info.rs +++ b/ipa-core/src/report/hybrid_info.rs @@ -1,5 +1,5 @@ use crate::report::{ - hybrid::{InvalidHybridReportError, NonAsciiStringError}, + hybrid::{InvalidHybridReportError, NonAsciiStringError, HELPER_ORIGIN}, KeyIdentifier, }; @@ -8,41 +8,44 @@ const DOMAIN: &str = "private-attribution"; #[derive(Clone, Debug, PartialEq)] pub struct HybridImpressionInfo { pub key_id: KeyIdentifier, - pub helper_origin: String, } impl HybridImpressionInfo { /// Creates a new instance. - /// - /// ## Errors - /// if helper or site origin is not a valid ASCII string. - pub fn new(key_id: KeyIdentifier, helper_origin: &str) -> Result { - // If the types of errors returned from this function change, then the validation in - // `EncryptedReport::from_bytes` may need to change as well. - if !helper_origin.is_ascii() { - return Err(helper_origin.into()); - } + #[must_use] + pub fn new(key_id: KeyIdentifier) -> Self { + Self { key_id } + } - Ok(Self { - key_id, - helper_origin: helper_origin.to_string(), - }) + #[must_use] + pub fn byte_len(&self) -> usize { + let out_len = std::mem::size_of_val(&self.key_id); + debug_assert_eq!(out_len, self.to_bytes().len(), "Serialization length estimation is incorrect and leads to extra allocation or wasted memory"); + out_len } - // Converts this instance into an owned byte slice that can further be used to create HPKE - // sender or receiver context. + // Converts this instance into an owned byte slice. DO NOT USE AS INPUT TO HPKE + // This is only for serialization and deserialization. #[must_use] pub fn to_bytes(&self) -> Box<[u8]> { - let info_len = DOMAIN.len() - + self.helper_origin.len() - + 2 // delimiters(?) - + std::mem::size_of_val(&self.key_id); + let info_len = std::mem::size_of_val(&self.key_id); + let mut r = Vec::with_capacity(info_len); + + r.push(self.key_id); + + debug_assert_eq!(r.len(), info_len, "Serialization length estimation is incorrect and leads to extra allocation or wasted memory"); + + r.into_boxed_slice() + } + + #[must_use] + // Converts this instance into an owned byte slice that can further be used to create HPKE sender or receiver context. + pub fn to_enc_bytes(&self) -> Box<[u8]> { + let info_len = DOMAIN.len() + HELPER_ORIGIN.len() + std::mem::size_of_val(&self.key_id); let mut r = Vec::with_capacity(info_len); r.extend_from_slice(DOMAIN.as_bytes()); - r.push(0); - r.extend_from_slice(self.helper_origin.as_bytes()); - r.push(0); + r.extend_from_slice(HELPER_ORIGIN.as_bytes()); r.push(self.key_id); @@ -56,45 +59,14 @@ impl HybridImpressionInfo { /// ## Panics /// If not enough delimiters are found in the input bytes. pub fn from_bytes(bytes: &[u8]) -> Result { - let mut pos = 0; - - let domain = std::str::from_utf8(&bytes[pos..pos + DOMAIN.len()]).map_err(|e| { - InvalidHybridReportError::DeserializationError("HybridImpressionInfo: domain", e.into()) - })?; - assert!( - domain == DOMAIN, - "HPKE Info domain does not match hardcoded domain" - ); - pos += DOMAIN.len() + 1; - - let delimiter_pos = bytes[pos..] - .iter() - .position(|&b| b == 0) - .unwrap_or_else(|| panic!("not enough delimiters for HybridImpressionInfo")); - let helper_origin = - String::from_utf8(bytes[pos..pos + delimiter_pos].to_vec()).map_err(|e| { - InvalidHybridReportError::DeserializationError( - "HybridImpressionInfo: helper_origin", - e.into(), - ) - })?; - pos += delimiter_pos; - debug_assert!(pos + 2 == bytes.len(), "{}", format!("bytes for HybridImpressionInfo::from_bytes has incorrect length. Expected: {}, Actual: {}", pos + 2, bytes.len()).to_string()); - pos += 1; - - let key_id = bytes[pos]; - - Ok(Self { - key_id, - helper_origin, - }) + let key_id = bytes[0]; + Ok(Self { key_id }) } } #[derive(Clone, Debug, PartialEq)] pub struct HybridConversionInfo { pub key_id: KeyIdentifier, - pub helper_origin: String, pub conversion_site_domain: String, pub timestamp: u64, pub epsilon: f64, @@ -105,28 +77,20 @@ impl HybridConversionInfo { /// Creates a new instance. /// /// ## Errors - /// if helper or site origin is not a valid ASCII string. + /// if `site_domain` is not a valid ASCII string. pub fn new( key_id: KeyIdentifier, - helper_origin: &str, conversion_site_domain: &str, timestamp: u64, epsilon: f64, sensitivity: f64, ) -> Result { - // If the types of errors returned from this function change, then the validation in - // `EncryptedReport::from_bytes` may need to change as well. - if !helper_origin.is_ascii() { - return Err(helper_origin.into()); - } - if !conversion_site_domain.is_ascii() { return Err(conversion_site_domain.into()); } Ok(Self { key_id, - helper_origin: helper_origin.to_string(), conversion_site_domain: conversion_site_domain.to_string(), timestamp, epsilon, @@ -134,14 +98,50 @@ impl HybridConversionInfo { }) } + #[must_use] + pub fn byte_len(&self) -> usize { + let out_len = std::mem::size_of_val(&self.key_id) + + 1 // delimiter + + self.conversion_site_domain.len() + + std::mem::size_of_val(&self.timestamp) + + std::mem::size_of_val(&self.epsilon) + + std::mem::size_of_val(&self.sensitivity); + debug_assert_eq!(out_len, self.to_bytes().len(), "Serialization length estimation is incorrect and leads to extra allocation or wasted memory"); + out_len + } + + // Converts this instance into an owned byte slice. DO NOT USE AS INPUT TO HPKE + // This is only for serialization and deserialization. + #[must_use] + pub fn to_bytes(&self) -> Box<[u8]> { + let info_len = self.conversion_site_domain.len() + + 1 // delimiter + + std::mem::size_of_val(&self.key_id) + + std::mem::size_of_val(&self.timestamp) + + std::mem::size_of_val(&self.epsilon) + + std::mem::size_of_val(&self.sensitivity); + let mut r = Vec::with_capacity(info_len); + + r.extend_from_slice(self.conversion_site_domain.as_bytes()); + r.push(0); + + r.push(self.key_id); + r.extend_from_slice(&self.timestamp.to_be_bytes()); + r.extend_from_slice(&self.epsilon.to_be_bytes()); + r.extend_from_slice(&self.sensitivity.to_be_bytes()); + + debug_assert_eq!(r.len(), info_len, "Serilization length estimation is incorrect and leads to extra allocation or wasted memory"); + + r.into_boxed_slice() + } + // Converts this instance into an owned byte slice that can further be used to create HPKE // sender or receiver context. #[must_use] - pub fn to_bytes(&self) -> Box<[u8]> { + pub fn to_enc_bytes(&self) -> Box<[u8]> { let info_len = DOMAIN.len() - + self.helper_origin.len() + + HELPER_ORIGIN.len() + self.conversion_site_domain.len() - + 3 // delimiters + std::mem::size_of_val(&self.key_id) + std::mem::size_of_val(&self.timestamp) + std::mem::size_of_val(&self.epsilon) @@ -149,11 +149,8 @@ impl HybridConversionInfo { let mut r = Vec::with_capacity(info_len); r.extend_from_slice(DOMAIN.as_bytes()); - r.push(0); - r.extend_from_slice(self.helper_origin.as_bytes()); - r.push(0); + r.extend_from_slice(HELPER_ORIGIN.as_bytes()); r.extend_from_slice(self.conversion_site_domain.as_bytes()); - r.push(0); r.push(self.key_id); r.extend_from_slice(&self.timestamp.to_be_bytes()); @@ -171,30 +168,7 @@ impl HybridConversionInfo { /// If not enough delimiters are found in the input bytes. pub fn from_bytes(bytes: &[u8]) -> Result { let mut pos = 0; - - let domain = std::str::from_utf8(&bytes[pos..pos + DOMAIN.len()]).map_err(|e| { - InvalidHybridReportError::DeserializationError("HybridConversionInfo: domain", e.into()) - })?; - assert!( - domain == DOMAIN, - "HPKE Info domain does not match hardcoded domain" - ); - pos += DOMAIN.len() + 1; - - let mut delimiter_pos = bytes[pos..] - .iter() - .position(|&b| b == 0) - .unwrap_or_else(|| panic!("not enough delimiters for HybridConversionInfo")); - let helper_origin = - String::from_utf8(bytes[pos..pos + delimiter_pos].to_vec()).map_err(|e| { - InvalidHybridReportError::DeserializationError( - "HybridConversionInfo: helper_origin", - e.into(), - ) - })?; - pos += delimiter_pos + 1; - - delimiter_pos = bytes[pos..] + let delimiter_pos = bytes[pos..] .iter() .position(|&b| b == 0) .unwrap_or_else(|| panic!("not enough delimiters for HybridConversionInfo")); @@ -206,7 +180,7 @@ impl HybridConversionInfo { ) })?; pos += delimiter_pos + 1; - debug_assert!(pos + 25 == bytes.len(), "{}", format!("bytes for HybridConversionInfo::from_bytes has incorrect length. Expected: {}, Actual: {}", pos + 25, bytes.len()).to_string()); + debug_assert!(pos + 3*8 + 1 == bytes.len(), "{}", format!("bytes for HybridConversionInfo::from_bytes has incorrect length. Expected: {}, Actual: {}", pos + 3*8 + 1, bytes.len()).to_string()); let key_id = bytes[pos]; pos += 1; @@ -218,7 +192,6 @@ impl HybridConversionInfo { Ok(Self { key_id, - helper_origin, conversion_site_domain, timestamp, epsilon, @@ -236,19 +209,17 @@ pub struct HybridInfo { impl HybridInfo { /// Creates a new instance. /// ## Errors - /// if helper or site origin is not a valid ASCII string. + /// if `site_domain` is not a valid ASCII string. pub fn new( key_id: KeyIdentifier, - helper_origin: &str, conversion_site_domain: &str, timestamp: u64, epsilon: f64, sensitivity: f64, ) -> Result { - let impression = HybridImpressionInfo::new(key_id, helper_origin)?; + let impression = HybridImpressionInfo::new(key_id); let conversion = HybridConversionInfo::new( key_id, - helper_origin, conversion_site_domain, timestamp, epsilon, @@ -271,7 +242,6 @@ impl HybridInfo { let conversion = HybridConversionInfo::from_bytes(bytes)?; let impression = HybridImpressionInfo { key_id: conversion.key_id, - helper_origin: conversion.helper_origin.clone(), }; Ok(Self { impression, @@ -286,7 +256,7 @@ mod test { #[test] fn test_hybrid_impression_serialization() { - let info = HybridImpressionInfo::new(0, "https://www.example.com").unwrap(); + let info = HybridImpressionInfo::new(0); let bytes = info.to_bytes(); let info2 = HybridImpressionInfo::from_bytes(&bytes).unwrap(); assert_eq!(info.to_bytes(), info2.to_bytes()); @@ -295,19 +265,11 @@ mod test { #[test] #[allow(clippy::float_cmp)] fn test_hybrid_conversion_serialization() { - let info = HybridConversionInfo::new( - 0, - "https://www.example.com", - "https://www.example2.com", - 1_234_567, - 1.151, - 0.95, - ) - .unwrap(); + let info = HybridConversionInfo::new(0, "https://www.example2.com", 1_234_567, 1.151, 0.95) + .unwrap(); let bytes = info.to_bytes(); let info2 = HybridConversionInfo::from_bytes(&bytes).unwrap(); assert_eq!(info2.key_id, 0); - assert_eq!(info2.helper_origin, "https://www.example.com"); assert_eq!(info2.conversion_site_domain, "https://www.example2.com"); assert_eq!(info2.timestamp, 1_234_567); assert_eq!(info2.epsilon, 1.151); @@ -317,15 +279,7 @@ mod test { #[test] fn test_hybrid_info_serialization() { - let info = HybridInfo::new( - 0, - "https://www.example.com", - "https://www.example2.com", - 1_234_567, - 1.151, - 0.95, - ) - .unwrap(); + let info = HybridInfo::new(0, "https://www.example2.com", 1_234_567, 1.151, 0.95).unwrap(); let bytes = info.to_bytes(); let info2 = HybridInfo::from_bytes(&bytes).unwrap(); assert_eq!(info.to_bytes(), info2.to_bytes()); diff --git a/ipa-core/src/test_fixture/hybrid.rs b/ipa-core/src/test_fixture/hybrid.rs index 80f5abc07..ba1b19e2c 100644 --- a/ipa-core/src/test_fixture/hybrid.rs +++ b/ipa-core/src/test_fixture/hybrid.rs @@ -23,13 +23,11 @@ pub enum TestHybridRecord { match_key: u64, breakdown_key: u32, key_id: KeyIdentifier, - helper_origin: String, }, TestConversion { match_key: u64, value: u32, key_id: KeyIdentifier, - helper_origin: String, conversion_site_domain: String, timestamp: u64, epsilon: f64, @@ -136,7 +134,6 @@ where match_key, breakdown_key, key_id, - helper_origin, } => { let ba_match_key = BA64::try_from(u128::from(match_key)) .unwrap() @@ -149,7 +146,7 @@ where HybridReport::Impression::(HybridImpressionReport { match_key: match_key_share, breakdown_key: breakdown_key_share, - info: HybridImpressionInfo::new(key_id, &helper_origin).unwrap(), + info: HybridImpressionInfo::new(key_id), }) }) .collect::>() @@ -160,7 +157,6 @@ where match_key, value, key_id, - helper_origin, conversion_site_domain, timestamp, epsilon, @@ -177,7 +173,6 @@ where value: value_share, info: HybridConversionInfo::new( key_id, - &helper_origin, &conversion_site_domain, timestamp, epsilon, @@ -265,14 +260,12 @@ pub fn hybrid_in_the_clear(input_rows: &[TestHybridRecord], max_breakdown: usize #[must_use] #[allow(clippy::too_many_lines)] pub fn build_hybrid_records_and_expectation() -> (Vec, Vec) { - let helper_origin = "HELPER_ORIGIN".to_string(); let conversion_site_domain = "meta.com".to_string(); let test_hybrid_records = vec![ TestHybridRecord::TestConversion { match_key: 12345, value: 2, key_id: 0, - helper_origin: helper_origin.clone(), conversion_site_domain: conversion_site_domain.clone(), timestamp: 100, epsilon: 0.0, @@ -282,7 +275,6 @@ pub fn build_hybrid_records_and_expectation() -> (Vec, Vec (Vec, Vec (Vec, Vec (Vec, Vec (Vec, Vec (Vec, Vec (Vec, Vec EventGenerator { .rng .gen_range(1..self.config.max_conversion_value.get()), key_id: 0, - helper_origin: "HELPER_ORIGIN".to_string(), conversion_site_domain: "meta.com".to_string(), timestamp: self.rng.gen_range(0..1000), epsilon: 0.0, @@ -125,7 +124,6 @@ impl EventGenerator { match_key, breakdown_key: self.rng.gen_range(0..self.config.max_breakdown_key.get()), key_id: 0, - helper_origin: "HELPER_ORIGIN".to_string(), } } }