From b2e88d8972828434a5bc9c3c3f19c497796cb3d8 Mon Sep 17 00:00:00 2001 From: Dima Bershadskiy Date: Sun, 12 May 2024 17:52:42 +0300 Subject: [PATCH 1/3] #101 config is extended with a custome patter for a domain specific fields the pattern lokos like "{}``{}",domain,param --- src/clink.rs | 52 ++++++++++++++++++++++++++++++++++++++++++--------- src/config.rs | 2 ++ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/clink.rs b/src/clink.rs index c02f5d3..ac33eef 100644 --- a/src/clink.rs +++ b/src/clink.rs @@ -35,7 +35,7 @@ impl Clink { let mut res = str.to_string(); for link in self.finder.links(str) { let mut l = Url::parse(self.unwrap_exit_params(link.as_str()).as_str()).unwrap(); - let query: Vec<(_, _)> = self.process_query(l.query_pairs()); + let query: Vec<(_, _)> = self.process_query(l.query_pairs(),l.domain()); l.set_query(None); for pair in query { l.query_pairs_mut() @@ -46,16 +46,16 @@ impl Clink { res } - fn process_query(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> { + fn process_query(&self, query: url::form_urlencoded::Parse<'_>, domain: Option<&str>) -> Vec<(String, String)> { match self.config.mode { - Mode::Remove => self.filter(query), - Mode::Replace => self.replace(query), + Mode::Remove => self.filter(query, domain), + Mode::Replace => self.replace(query, domain), Mode::YourMom => { let date = Utc::now(); if date.month() == 5 && date.day() == 9 { - self.filter(query) + self.filter(query, domain) } else { - let mut tmp = self.filter(query); + let mut tmp = self.filter(query, domain); tmp.push(("utm_source".to_string(), "your_mom".to_string())); tmp } @@ -82,18 +82,32 @@ impl Clink { } } - fn filter(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> { + fn filter(&self, query: url::form_urlencoded::Parse<'_>, domain: Option<&str>) -> Vec<(String, String)> { query - .filter(|p| !self.config.params.contains::>(&p.0.clone().into())) + .filter(|p| { + let global_absent = !self.config.params.contains::>(&p.0.clone().into()); + return global_absent && + if let Some(domain_val) = domain { + let param_name = format!("{}``{}", domain_val, p.0); + return !self.config.params.contains::>(¶m_name.into()); + } else {true} + }) .map(|p| (p.0.to_string(), p.1.to_string())) .collect() } - fn replace(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> { + fn replace(&self, query: url::form_urlencoded::Parse<'_>, domain: Option<&str>) -> Vec<(String, String)> { query .map(|p| { if self.config.params.contains::>(&p.0.clone().into()) { (p.0.to_string(), self.config.replace_to.clone()) + } else if let Some(domain_val) = domain { + let param_name = format!("{}``{}", domain_val, p.0); + if self.config.params.contains::>(¶m_name.into()) { + (p.0.to_string(), self.config.replace_to.clone()) + } else { + (p.0.to_string(), p.1.to_string()) + } } else { (p.0.to_string(), p.1.to_string()) } @@ -267,6 +281,26 @@ mod find_and_replace { "https://test.test/?foo=clink" ); } + + #[test] + fn youtube_sanitize() { + let clink = Clink::new(ClinkConfig::default()); + + assert_eq!( + clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH",), + "https://youtu.be/dQw4w9WgXcQ" + ); + + assert_eq!( + clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69",), + "https://youtu.be/dQw4w9WgXcQ?t=69" + ); + + assert_eq!( + clink.find_and_replace("https://test.test/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69",), + "https://test.test/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69" + ); + } } #[cfg(test)] diff --git a/src/config.rs b/src/config.rs index b3a17b2..125fa24 100644 --- a/src/config.rs +++ b/src/config.rs @@ -21,6 +21,8 @@ fn get_default_params() -> HashSet> { "utm_medium".into(), "utm_term".into(), "utm_content".into(), + "youtube.com``si".into(), + "youtu.be``si".into(), ]) } From 2c6de0c4de54d41cdd1114293e9ecefebdd87cdf Mon Sep 17 00:00:00 2001 From: Dima Bershadskiy Date: Sun, 12 May 2024 18:06:48 +0300 Subject: [PATCH 2/3] some more tests. Evil mode not supported yet. --- src/clink.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/clink.rs b/src/clink.rs index ac33eef..45ccf9e 100644 --- a/src/clink.rs +++ b/src/clink.rs @@ -296,10 +296,45 @@ mod find_and_replace { "https://youtu.be/dQw4w9WgXcQ?t=69" ); + assert_eq!( + clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69&fbclid=clid",), + "https://youtu.be/dQw4w9WgXcQ?t=69" + ); + assert_eq!( clink.find_and_replace("https://test.test/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69",), "https://test.test/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69" ); + + let clink = Clink::new(ClinkConfig::new(Mode::Replace)); + assert_eq!( + clink.find_and_replace( + "https://test.test/?fbclid=dsadsa&utm_source=fafa&utm_campaign=fafas&utm_medium=adsa&si=qweasd", + ), + "https://test.test/?fbclid=clink&utm_source=clink&utm_campaign=clink&utm_medium=clink&si=qweasd" + ); + + assert_eq!( + clink.find_and_replace( + "https://youtu.be/?fbclid=dsadsa&utm_source=fafa&utm_campaign=fafas&utm_medium=adsa&si=qweasd", + ), + "https://youtu.be/?fbclid=clink&utm_source=clink&utm_campaign=clink&utm_medium=clink&si=clink" + ); + + let clink = Clink::new(ClinkConfig::new(Mode::YourMom)); + assert_eq!( + clink.find_and_replace( + "https://test.test/?si=dsadsa", + ), + "https://test.test/?si=dsadsa&utm_source=your_mom" + ); + + assert_eq!( + clink.find_and_replace( + "https://youtu.be/?si=dsadsa", + ), + "https://youtu.be/?utm_source=your_mom" + ); } } From 21c634b7dc087402f7c394673a1bb8d361890e58 Mon Sep 17 00:00:00 2001 From: Dima Bershadskiy Date: Tue, 14 May 2024 16:53:26 +0300 Subject: [PATCH 3/3] PR fix www. prefix solved some docs added --- readme.md | 2 ++ src/clink.rs | 51 +++++++++++++++++++++++++++++++++++++-------------- src/config.rs | 3 +++ 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/readme.md b/readme.md index 7923962..134c71c 100644 --- a/readme.md +++ b/readme.md @@ -53,6 +53,8 @@ params = [ 'utm_medium', # Identifies what type of link was used 'utm_term', # Identifies search terms 'utm_content', # Identifies what specifically was clicked to bring the user to the site + "youtube.com``si", # YouTube specific Source identifier using "{domain}``{param}" pattern + "youtu.be``si", # YouTube specific Source identifier using "{domain}``{param}" pattern ] # Which exit params in URL should be unwrapped exit = [ diff --git a/src/clink.rs b/src/clink.rs index 45ccf9e..228bb09 100644 --- a/src/clink.rs +++ b/src/clink.rs @@ -35,7 +35,13 @@ impl Clink { let mut res = str.to_string(); for link in self.finder.links(str) { let mut l = Url::parse(self.unwrap_exit_params(link.as_str()).as_str()).unwrap(); - let query: Vec<(_, _)> = self.process_query(l.query_pairs(),l.domain()); + let query: Vec<(_, _)> = self.process_query( + l.query_pairs(), + l.domain() + .unwrap_or_default() + .strip_prefix("www.") + .or(l.domain()), + ); l.set_query(None); for pair in query { l.query_pairs_mut() @@ -46,7 +52,11 @@ impl Clink { res } - fn process_query(&self, query: url::form_urlencoded::Parse<'_>, domain: Option<&str>) -> Vec<(String, String)> { + fn process_query( + &self, + query: url::form_urlencoded::Parse<'_>, + domain: Option<&str>, + ) -> Vec<(String, String)> { match self.config.mode { Mode::Remove => self.filter(query, domain), Mode::Replace => self.replace(query, domain), @@ -82,21 +92,31 @@ impl Clink { } } - fn filter(&self, query: url::form_urlencoded::Parse<'_>, domain: Option<&str>) -> Vec<(String, String)> { + fn filter( + &self, + query: url::form_urlencoded::Parse<'_>, + domain: Option<&str>, + ) -> Vec<(String, String)> { query .filter(|p| { let global_absent = !self.config.params.contains::>(&p.0.clone().into()); - return global_absent && - if let Some(domain_val) = domain { + global_absent + && if let Some(domain_val) = domain { let param_name = format!("{}``{}", domain_val, p.0); return !self.config.params.contains::>(¶m_name.into()); - } else {true} + } else { + true + } }) .map(|p| (p.0.to_string(), p.1.to_string())) .collect() } - fn replace(&self, query: url::form_urlencoded::Parse<'_>, domain: Option<&str>) -> Vec<(String, String)> { + fn replace( + &self, + query: url::form_urlencoded::Parse<'_>, + domain: Option<&str>, + ) -> Vec<(String, String)> { query .map(|p| { if self.config.params.contains::>(&p.0.clone().into()) { @@ -291,13 +311,20 @@ mod find_and_replace { "https://youtu.be/dQw4w9WgXcQ" ); + assert_eq!( + clink.find_and_replace("https://www.youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH",), + "https://www.youtu.be/dQw4w9WgXcQ" + ); + assert_eq!( clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69",), "https://youtu.be/dQw4w9WgXcQ?t=69" ); assert_eq!( - clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69&fbclid=clid",), + clink.find_and_replace( + "https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69&fbclid=clid", + ), "https://youtu.be/dQw4w9WgXcQ?t=69" ); @@ -323,16 +350,12 @@ mod find_and_replace { let clink = Clink::new(ClinkConfig::new(Mode::YourMom)); assert_eq!( - clink.find_and_replace( - "https://test.test/?si=dsadsa", - ), + clink.find_and_replace("https://test.test/?si=dsadsa",), "https://test.test/?si=dsadsa&utm_source=your_mom" ); assert_eq!( - clink.find_and_replace( - "https://youtu.be/?si=dsadsa", - ), + clink.find_and_replace("https://youtu.be/?si=dsadsa",), "https://youtu.be/?utm_source=your_mom" ); } diff --git a/src/config.rs b/src/config.rs index 125fa24..3ebeda7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -9,6 +9,9 @@ use serde::{Deserialize, Serialize}; use crate::mode::Mode; +/// add query param, that must be replaced within any domain +/// to specify domain specific params use format +/// "{domain}``{param}" fn get_default_params() -> HashSet> { HashSet::from([ "fbclid".into(),