diff --git a/readme.md b/readme.md index 7923962..134c71c 100644 --- a/readme.md +++ b/readme.md @@ -53,6 +53,8 @@ params = [ 'utm_medium', # Identifies what type of link was used 'utm_term', # Identifies search terms 'utm_content', # Identifies what specifically was clicked to bring the user to the site + "youtube.com``si", # YouTube specific Source identifier using "{domain}``{param}" pattern + "youtu.be``si", # YouTube specific Source identifier using "{domain}``{param}" pattern ] # Which exit params in URL should be unwrapped exit = [ diff --git a/src/clink.rs b/src/clink.rs index c02f5d3..228bb09 100644 --- a/src/clink.rs +++ b/src/clink.rs @@ -35,7 +35,13 @@ impl Clink { let mut res = str.to_string(); for link in self.finder.links(str) { let mut l = Url::parse(self.unwrap_exit_params(link.as_str()).as_str()).unwrap(); - let query: Vec<(_, _)> = self.process_query(l.query_pairs()); + let query: Vec<(_, _)> = self.process_query( + l.query_pairs(), + l.domain() + .unwrap_or_default() + .strip_prefix("www.") + .or(l.domain()), + ); l.set_query(None); for pair in query { l.query_pairs_mut() @@ -46,16 +52,20 @@ impl Clink { res } - fn process_query(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> { + fn process_query( + &self, + query: url::form_urlencoded::Parse<'_>, + domain: Option<&str>, + ) -> Vec<(String, String)> { match self.config.mode { - Mode::Remove => self.filter(query), - Mode::Replace => self.replace(query), + Mode::Remove => self.filter(query, domain), + Mode::Replace => self.replace(query, domain), Mode::YourMom => { let date = Utc::now(); if date.month() == 5 && date.day() == 9 { - self.filter(query) + self.filter(query, domain) } else { - let mut tmp = self.filter(query); + let mut tmp = self.filter(query, domain); tmp.push(("utm_source".to_string(), "your_mom".to_string())); tmp } @@ -82,18 +92,42 @@ impl Clink { } } - fn filter(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> { + fn filter( + &self, + query: url::form_urlencoded::Parse<'_>, + domain: Option<&str>, + ) -> Vec<(String, String)> { query - .filter(|p| !self.config.params.contains::>(&p.0.clone().into())) + .filter(|p| { + let global_absent = !self.config.params.contains::>(&p.0.clone().into()); + global_absent + && if let Some(domain_val) = domain { + let param_name = format!("{}``{}", domain_val, p.0); + return !self.config.params.contains::>(¶m_name.into()); + } else { + true + } + }) .map(|p| (p.0.to_string(), p.1.to_string())) .collect() } - fn replace(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> { + fn replace( + &self, + query: url::form_urlencoded::Parse<'_>, + domain: Option<&str>, + ) -> Vec<(String, String)> { query .map(|p| { if self.config.params.contains::>(&p.0.clone().into()) { (p.0.to_string(), self.config.replace_to.clone()) + } else if let Some(domain_val) = domain { + let param_name = format!("{}``{}", domain_val, p.0); + if self.config.params.contains::>(¶m_name.into()) { + (p.0.to_string(), self.config.replace_to.clone()) + } else { + (p.0.to_string(), p.1.to_string()) + } } else { (p.0.to_string(), p.1.to_string()) } @@ -267,6 +301,64 @@ mod find_and_replace { "https://test.test/?foo=clink" ); } + + #[test] + fn youtube_sanitize() { + let clink = Clink::new(ClinkConfig::default()); + + assert_eq!( + clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH",), + "https://youtu.be/dQw4w9WgXcQ" + ); + + assert_eq!( + clink.find_and_replace("https://www.youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH",), + "https://www.youtu.be/dQw4w9WgXcQ" + ); + + assert_eq!( + clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69",), + "https://youtu.be/dQw4w9WgXcQ?t=69" + ); + + assert_eq!( + clink.find_and_replace( + "https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69&fbclid=clid", + ), + "https://youtu.be/dQw4w9WgXcQ?t=69" + ); + + assert_eq!( + clink.find_and_replace("https://test.test/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69",), + "https://test.test/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69" + ); + + let clink = Clink::new(ClinkConfig::new(Mode::Replace)); + assert_eq!( + clink.find_and_replace( + "https://test.test/?fbclid=dsadsa&utm_source=fafa&utm_campaign=fafas&utm_medium=adsa&si=qweasd", + ), + "https://test.test/?fbclid=clink&utm_source=clink&utm_campaign=clink&utm_medium=clink&si=qweasd" + ); + + assert_eq!( + clink.find_and_replace( + "https://youtu.be/?fbclid=dsadsa&utm_source=fafa&utm_campaign=fafas&utm_medium=adsa&si=qweasd", + ), + "https://youtu.be/?fbclid=clink&utm_source=clink&utm_campaign=clink&utm_medium=clink&si=clink" + ); + + let clink = Clink::new(ClinkConfig::new(Mode::YourMom)); + assert_eq!( + clink.find_and_replace("https://test.test/?si=dsadsa",), + "https://test.test/?si=dsadsa&utm_source=your_mom" + ); + + assert_eq!( + clink.find_and_replace("https://youtu.be/?si=dsadsa",), + "https://youtu.be/?utm_source=your_mom" + ); + } } #[cfg(test)] diff --git a/src/config.rs b/src/config.rs index b3a17b2..3ebeda7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -9,6 +9,9 @@ use serde::{Deserialize, Serialize}; use crate::mode::Mode; +/// add query param, that must be replaced within any domain +/// to specify domain specific params use format +/// "{domain}``{param}" fn get_default_params() -> HashSet> { HashSet::from([ "fbclid".into(), @@ -21,6 +24,8 @@ fn get_default_params() -> HashSet> { "utm_medium".into(), "utm_term".into(), "utm_content".into(), + "youtube.com``si".into(), + "youtu.be``si".into(), ]) }