Skip to content

Commit

Permalink
Merge pull request #111 from bershadskiy/sanitize-YouTube-links
Browse files Browse the repository at this point in the history
  • Loading branch information
Lurk authored May 14, 2024
2 parents df28a63 + 21c634b commit 9f8eeca
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 9 deletions.
2 changes: 2 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ params = [
'utm_medium', # Identifies what type of link was used
'utm_term', # Identifies search terms
'utm_content', # Identifies what specifically was clicked to bring the user to the site
"youtube.com``si", # YouTube specific Source identifier using "{domain}``{param}" pattern
"youtu.be``si", # YouTube specific Source identifier using "{domain}``{param}" pattern
]
# Which exit params in URL should be unwrapped
exit = [
Expand Down
110 changes: 101 additions & 9 deletions src/clink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,13 @@ impl Clink {
let mut res = str.to_string();
for link in self.finder.links(str) {
let mut l = Url::parse(self.unwrap_exit_params(link.as_str()).as_str()).unwrap();
let query: Vec<(_, _)> = self.process_query(l.query_pairs());
let query: Vec<(_, _)> = self.process_query(
l.query_pairs(),
l.domain()
.unwrap_or_default()
.strip_prefix("www.")
.or(l.domain()),
);
l.set_query(None);
for pair in query {
l.query_pairs_mut()
Expand All @@ -46,16 +52,20 @@ impl Clink {
res
}

fn process_query(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> {
fn process_query(
&self,
query: url::form_urlencoded::Parse<'_>,
domain: Option<&str>,
) -> Vec<(String, String)> {
match self.config.mode {
Mode::Remove => self.filter(query),
Mode::Replace => self.replace(query),
Mode::Remove => self.filter(query, domain),
Mode::Replace => self.replace(query, domain),
Mode::YourMom => {
let date = Utc::now();
if date.month() == 5 && date.day() == 9 {
self.filter(query)
self.filter(query, domain)
} else {
let mut tmp = self.filter(query);
let mut tmp = self.filter(query, domain);
tmp.push(("utm_source".to_string(), "your_mom".to_string()));
tmp
}
Expand All @@ -82,18 +92,42 @@ impl Clink {
}
}

fn filter(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> {
fn filter(
&self,
query: url::form_urlencoded::Parse<'_>,
domain: Option<&str>,
) -> Vec<(String, String)> {
query
.filter(|p| !self.config.params.contains::<Rc<str>>(&p.0.clone().into()))
.filter(|p| {
let global_absent = !self.config.params.contains::<Rc<str>>(&p.0.clone().into());
global_absent
&& if let Some(domain_val) = domain {
let param_name = format!("{}``{}", domain_val, p.0);
return !self.config.params.contains::<Rc<str>>(&param_name.into());
} else {
true
}
})
.map(|p| (p.0.to_string(), p.1.to_string()))
.collect()
}

fn replace(&self, query: url::form_urlencoded::Parse<'_>) -> Vec<(String, String)> {
fn replace(
&self,
query: url::form_urlencoded::Parse<'_>,
domain: Option<&str>,
) -> Vec<(String, String)> {
query
.map(|p| {
if self.config.params.contains::<Rc<str>>(&p.0.clone().into()) {
(p.0.to_string(), self.config.replace_to.clone())
} else if let Some(domain_val) = domain {
let param_name = format!("{}``{}", domain_val, p.0);
if self.config.params.contains::<Rc<str>>(&param_name.into()) {
(p.0.to_string(), self.config.replace_to.clone())
} else {
(p.0.to_string(), p.1.to_string())
}
} else {
(p.0.to_string(), p.1.to_string())
}
Expand Down Expand Up @@ -267,6 +301,64 @@ mod find_and_replace {
"https://test.test/?foo=clink"
);
}

#[test]
fn youtube_sanitize() {
let clink = Clink::new(ClinkConfig::default());

assert_eq!(
clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH",),
"https://youtu.be/dQw4w9WgXcQ"
);

assert_eq!(
clink.find_and_replace("https://www.youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH",),
"https://www.youtu.be/dQw4w9WgXcQ"
);

assert_eq!(
clink.find_and_replace("https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69",),
"https://youtu.be/dQw4w9WgXcQ?t=69"
);

assert_eq!(
clink.find_and_replace(
"https://youtu.be/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69&fbclid=clid",
),
"https://youtu.be/dQw4w9WgXcQ?t=69"
);

assert_eq!(
clink.find_and_replace("https://test.test/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69",),
"https://test.test/dQw4w9WgXcQ?si=NblIBgit-qHN7MoH&t=69"
);

let clink = Clink::new(ClinkConfig::new(Mode::Replace));
assert_eq!(
clink.find_and_replace(
"https://test.test/?fbclid=dsadsa&utm_source=fafa&utm_campaign=fafas&utm_medium=adsa&si=qweasd",
),
"https://test.test/?fbclid=clink&utm_source=clink&utm_campaign=clink&utm_medium=clink&si=qweasd"
);

assert_eq!(
clink.find_and_replace(
"https://youtu.be/?fbclid=dsadsa&utm_source=fafa&utm_campaign=fafas&utm_medium=adsa&si=qweasd",
),
"https://youtu.be/?fbclid=clink&utm_source=clink&utm_campaign=clink&utm_medium=clink&si=clink"
);

let clink = Clink::new(ClinkConfig::new(Mode::YourMom));
assert_eq!(
clink.find_and_replace("https://test.test/?si=dsadsa",),
"https://test.test/?si=dsadsa&utm_source=your_mom"
);

assert_eq!(
clink.find_and_replace("https://youtu.be/?si=dsadsa",),
"https://youtu.be/?utm_source=your_mom"
);
}
}

#[cfg(test)]
Expand Down
5 changes: 5 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ use serde::{Deserialize, Serialize};

use crate::mode::Mode;

/// add query param, that must be replaced within any domain
/// to specify domain specific params use format
/// "{domain}``{param}"
fn get_default_params() -> HashSet<Rc<str>> {
HashSet::from([
"fbclid".into(),
Expand All @@ -21,6 +24,8 @@ fn get_default_params() -> HashSet<Rc<str>> {
"utm_medium".into(),
"utm_term".into(),
"utm_content".into(),
"youtube.com``si".into(),
"youtu.be``si".into(),
])
}

Expand Down

0 comments on commit 9f8eeca

Please sign in to comment.