Skip to content

Commit

Permalink
Allow event producer to specify which values to anonymize (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
hpeebles authored Mar 15, 2024
1 parent dcc6b1e commit 7f3b93f
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 112 deletions.
11 changes: 3 additions & 8 deletions rs/canister/api/can.did
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
type AnonymizationInitConfig = record {
users : opt bool;
exclusions : opt vec text;
sources : opt bool;
};
type Anonymizable = variant { Anonymize : text; Public : text };
type EventsArgs = record { start : nat64; length : nat64 };
type EventsResponse = record {
events : vec IndexedEvent;
latest_event_index : opt nat64;
};
type IdempotentEvent = record {
source : opt text;
source : opt Anonymizable;
name : text;
user : opt text;
user : opt Anonymizable;
timestamp : nat64;
payload : vec nat8;
idempotency_key : nat;
Expand All @@ -26,7 +22,6 @@ type IndexedEvent = record {
};
type InitArgs = record {
push_events_whitelist : vec principal;
anonymization_config : opt AnonymizationInitConfig;
read_events_whitelist : vec principal;
};
type PushEventsArgs = record { events : vec IdempotentEvent };
Expand Down
20 changes: 18 additions & 2 deletions rs/canister/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ pub struct IdempotentEvent {
pub idempotency_key: u128,
pub name: String,
pub timestamp: TimestampMillis,
pub user: Option<String>,
pub source: Option<String>,
pub user: Option<Anonymizable>,
pub source: Option<Anonymizable>,
#[serde(with = "serde_bytes")]
pub payload: Vec<u8>,
}
Expand All @@ -32,3 +32,19 @@ pub struct IndexedEvent {
#[serde(with = "serde_bytes")]
pub payload: Vec<u8>,
}

#[derive(CandidType, Serialize, Deserialize, Clone, Debug)]
pub enum Anonymizable {
Public(String),
Anonymize(String),
}

impl Anonymizable {
pub fn new(value: String, anonymize: bool) -> Anonymizable {
if anonymize {
Anonymizable::Anonymize(value)
} else {
Anonymizable::Public(value)
}
}
}
8 changes: 0 additions & 8 deletions rs/canister/api/src/lifecycle/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,4 @@ use serde::Serialize;
pub struct InitArgs {
pub push_events_whitelist: Vec<Principal>,
pub read_events_whitelist: Vec<Principal>,
pub anonymization_config: Option<AnonymizationInitConfig>,
}

#[derive(CandidType, Serialize, Deserialize, Clone, Debug, Default)]
pub struct AnonymizationInitConfig {
pub users: Option<bool>,
pub sources: Option<bool>,
pub exclusions: Option<Vec<String>>,
}
1 change: 0 additions & 1 deletion rs/canister/impl/src/lifecycle/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ fn init(args: InitArgs) {
state::init(State::new(
args.push_events_whitelist.into_iter().collect(),
args.read_events_whitelist.into_iter().collect(),
args.anonymization_config.unwrap_or_default(),
));

ic_cdk_timers::set_timer(Duration::ZERO, || {
Expand Down
42 changes: 37 additions & 5 deletions rs/canister/impl/src/model/events.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use crate::memory::{get_events_data_memory, get_events_index_memory, Memory};
use crate::model::string_to_num_map::StringToNumMap;
use candid::Deserialize;
use event_store_canister::{IdempotentEvent, IndexedEvent, TimestampMillis};
use event_store_canister::{Anonymizable, IdempotentEvent, IndexedEvent, TimestampMillis};
use ic_stable_structures::storable::Bound;
use ic_stable_structures::{StableLog, Storable};
use serde::Serialize;
use sha2::Digest;
use std::borrow::Cow;
use std::fmt::Write;

pub struct Events {
events: StableLog<StorableEvent, Memory, Memory>,
Expand All @@ -22,8 +24,8 @@ impl Events {
.collect()
}

pub fn push(&mut self, event: IdempotentEvent) {
let storable = self.convert_to_storable(event, self.events.len());
pub fn push(&mut self, event: IdempotentEvent, salt: [u8; 32]) {
let storable = self.convert_to_storable(event, self.events.len(), salt);

self.events.append(&storable).unwrap();
}
Expand All @@ -34,14 +36,23 @@ impl Events {
}
}

fn convert_to_storable(&mut self, event: IdempotentEvent, index: u64) -> StorableEvent {
fn convert_to_storable(
&mut self,
event: IdempotentEvent,
index: u64,
salt: [u8; 32],
) -> StorableEvent {
StorableEvent {
index,
name: self.string_to_num_map.convert_to_num(event.name),
timestamp: event.timestamp,
user: event.user.map(|u| self.string_to_num_map.convert_to_num(u)),
user: event
.user
.map(|u| to_maybe_anonymized_string(u, salt))
.map(|u| self.string_to_num_map.convert_to_num(u)),
source: event
.source
.map(|s| to_maybe_anonymized_string(s, salt))
.map(|s| self.string_to_num_map.convert_to_num(s)),
payload: event.payload,
}
Expand Down Expand Up @@ -119,3 +130,24 @@ impl Storable for StorableEvent {
fn is_empty_slice<T>(vec: &[T]) -> bool {
vec.is_empty()
}

fn to_maybe_anonymized_string(value: Anonymizable, salt: [u8; 32]) -> String {
match value {
Anonymizable::Public(s) => s,
Anonymizable::Anonymize(s) => anonymize(&s, salt),
}
}

fn anonymize(value: &str, salt: [u8; 32]) -> String {
// Generates a 32 character string from the input value + the salt
let mut hasher = sha2::Sha256::new();
hasher.update(value.as_bytes());
hasher.update(salt);
let hash: [u8; 32] = hasher.finalize().into();

let mut string = String::with_capacity(32);
for byte in &hash[0..16] {
write!(string, "{byte:02x}").unwrap();
}
string
}
68 changes: 4 additions & 64 deletions rs/canister/impl/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,11 @@ use crate::env;
use crate::model::events::Events;
use crate::model::salt::Salt;
use candid::Principal;
use event_store_canister::{
AnonymizationInitConfig, IdempotentEvent, TimestampMillis, WhitelistedPrincipals,
};
use event_store_canister::{IdempotentEvent, TimestampMillis, WhitelistedPrincipals};
use event_store_utils::EventDeduper;
use serde::{Deserialize, Serialize};
use sha2::Digest;
use std::cell::RefCell;
use std::collections::HashSet;
use std::fmt::Write;

thread_local! {
static STATE: RefCell<Option<State>> = RefCell::default();
Expand All @@ -24,7 +20,6 @@ pub struct State {
events: Events,
event_deduper: EventDeduper,
salt: Salt,
anonymization_config: AnonymizationConfig,
}

const STATE_ALREADY_INITIALIZED: &str = "State has already been initialized";
Expand Down Expand Up @@ -56,14 +51,12 @@ impl State {
pub fn new(
push_events_whitelist: HashSet<Principal>,
read_events_whitelist: HashSet<Principal>,
anonymization_config: AnonymizationInitConfig,
) -> State {
State {
push_events_whitelist,
read_events_whitelist,
events: Events::default(),
event_deduper: EventDeduper::default(),
anonymization_config: anonymization_config.into(),
salt: Salt::default(),
}
}
Expand Down Expand Up @@ -93,62 +86,9 @@ impl State {
self.salt.set(salt);
}

pub fn push_event(&mut self, mut event: IdempotentEvent, now: TimestampMillis) {
if !self.event_deduper.try_push(event.idempotency_key, now) {
return;
}

if self.anonymization_config.users {
if let Some(user) = event
.user
.as_mut()
.filter(|u| !self.anonymization_config.exclusions.contains(*u))
{
*user = self.anonymize(user);
}
}

if self.anonymization_config.sources {
if let Some(source) = event
.source
.as_mut()
.filter(|s| !self.anonymization_config.exclusions.contains(*s))
{
*source = self.anonymize(source);
}
}

self.events.push(event);
}

fn anonymize(&self, value: &str) -> String {
// Generates a 32 character string from the input value + the salt
let mut hasher = sha2::Sha256::new();
hasher.update(value.as_bytes());
hasher.update(self.salt.get());
let hash: [u8; 32] = hasher.finalize().into();

let mut string = String::with_capacity(32);
for byte in &hash[0..16] {
write!(string, "{byte:02x}").unwrap();
}
string
}
}

#[derive(Serialize, Deserialize, Default)]
struct AnonymizationConfig {
users: bool,
sources: bool,
exclusions: HashSet<String>,
}

impl From<AnonymizationInitConfig> for AnonymizationConfig {
fn from(value: AnonymizationInitConfig) -> Self {
AnonymizationConfig {
users: value.users.unwrap_or_default(),
sources: value.sources.unwrap_or_default(),
exclusions: value.exclusions.unwrap_or_default().into_iter().collect(),
pub fn push_event(&mut self, event: IdempotentEvent, now: TimestampMillis) {
if self.event_deduper.try_push(event.idempotency_key, now) {
self.events.push(event, self.salt.get());
}
}
}
14 changes: 3 additions & 11 deletions rs/integration_tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
use crate::rng::{random, random_bytes, random_principal, random_string};
use crate::setup::setup_new_env;
use candid::Principal;
use event_store_canister::{
AnonymizationInitConfig, EventsArgs, IdempotentEvent, InitArgs, PushEventsArgs,
};
use event_store_canister::{Anonymizable, EventsArgs, IdempotentEvent, InitArgs, PushEventsArgs};
use pocket_ic::PocketIc;
use std::fs::File;
use std::io::Read;
Expand Down Expand Up @@ -81,11 +79,6 @@ fn users_and_source_can_be_anonymized(users: bool, sources: bool) {
} = install_canister(Some(InitArgs {
push_events_whitelist: vec![random_principal()],
read_events_whitelist: vec![random_principal()],
anonymization_config: Some(AnonymizationInitConfig {
users: Some(users),
sources: Some(sources),
exclusions: None,
}),
}));

let user = random_string();
Expand All @@ -100,8 +93,8 @@ fn users_and_source_can_be_anonymized(users: bool, sources: bool) {
idempotency_key: random(),
name: random_string(),
timestamp: 1000,
user: Some(user.clone()),
source: Some(source.clone()),
user: Some(Anonymizable::new(user.clone(), users)),
source: Some(Anonymizable::new(source.clone(), sources)),
payload: Vec::new(),
}],
},
Expand Down Expand Up @@ -143,7 +136,6 @@ fn install_canister(init_args: Option<InitArgs>) -> TestEnv {
let init_args = init_args.unwrap_or_else(|| InitArgs {
push_events_whitelist: vec![random_principal()],
read_events_whitelist: vec![random_principal()],
anonymization_config: None,
});

let canister_id = env.create_canister_with_settings(Some(controller), None);
Expand Down
26 changes: 13 additions & 13 deletions rs/producer/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use event_store_canister::{IdempotentEvent, TimestampMillis};
use event_store_canister::{Anonymizable, IdempotentEvent, TimestampMillis};
use ic_principal::Principal;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::sync::{Arc, Mutex, MutexGuard};
Expand Down Expand Up @@ -41,16 +41,16 @@ struct ClientInner<R> {
pub struct Event {
name: String,
timestamp: TimestampMillis,
user: Option<String>,
source: Option<String>,
user: Option<Anonymizable>,
source: Option<Anonymizable>,
payload: Vec<u8>,
}

pub struct EventBuilder {
name: String,
timestamp: TimestampMillis,
user: Option<String>,
source: Option<String>,
user: Option<Anonymizable>,
source: Option<Anonymizable>,
payload: Vec<u8>,
}

Expand All @@ -65,23 +65,23 @@ impl EventBuilder {
}
}

pub fn with_user(mut self, user: impl Into<String>) -> Self {
self.user = Some(user.into());
pub fn with_user(mut self, user: impl Into<String>, anonymize: bool) -> Self {
self.user = Some(Anonymizable::new(user.into(), anonymize));
self
}

pub fn with_maybe_user(mut self, user: Option<impl Into<String>>) -> Self {
self.user = user.map(|u| u.into());
pub fn with_maybe_user(mut self, user: Option<impl Into<String>>, anonymize: bool) -> Self {
self.user = user.map(|u| Anonymizable::new(u.into(), anonymize));
self
}

pub fn with_source(mut self, source: impl Into<String>) -> Self {
self.source = Some(source.into());
pub fn with_source(mut self, source: impl Into<String>, anonymize: bool) -> Self {
self.source = Some(Anonymizable::new(source.into(), anonymize));
self
}

pub fn with_maybe_source(mut self, source: Option<impl Into<String>>) -> Self {
self.source = source.map(|u| u.into());
pub fn with_maybe_source(mut self, source: Option<impl Into<String>>, anonymize: bool) -> Self {
self.source = source.map(|u| Anonymizable::new(u.into(), anonymize));
self
}

Expand Down

0 comments on commit 7f3b93f

Please sign in to comment.