From 6f51667ca7ca9fc6ab43f6be239dcce05beaf05f Mon Sep 17 00:00:00 2001 From: 22earth Date: Sat, 15 Oct 2022 16:15:57 +0800 Subject: [PATCH] feat: filter regex --- Cargo.lock | 1 + Cargo.toml | 1 + README.md | 7 ++++--- src/rss_site/mod.rs | 24 +++++++++++++++++++++++- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 837f01b..4730882 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1154,6 +1154,7 @@ dependencies = [ "gcookie", "log", "once_cell", + "regex", "reqwest", "rss", "rusqlite", diff --git a/Cargo.toml b/Cargo.toml index 10e4b25..72a7684 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ gcookie = "0.0.3" log = "0.4.0" env_logger = "0.9.0" once_cell = "1.15.0" +regex = "1" [profile.release] lto = true diff --git a/README.md b/README.md index 7af7cce..8d350d6 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ - [x] proxy 配置 - ~~目前写死在 build_proxy_client 里面~~ - 读取 ALL_PROXY 或者 HTTPS_PROXY 环境变量 -- [ ] 正则过滤 filter +- [x] 正则过滤 filter - [ ] Windows 定时任务 - ~~懒得写了,我是手动配置的~~ - [x] 不同网站的并发任务 @@ -57,7 +57,7 @@ rss2pan -u "https://mikanani.me/RSS/Bangumi?bangumiId=2739&subgroupid=12" "mikanani.me": [ { "name": "test", - "filter": "简体内嵌", + "filter": "/简体|1080p/", "url": "https://mikanani.me/RSS/Bangumi?bangumiId=2739&subgroupid=12" } ], @@ -73,7 +73,8 @@ rss2pan -u "https://mikanani.me/RSS/Bangumi?bangumiId=2739&subgroupid=12" ``` 配置了 `filter` 后,标题包含该文字的会被离线。不设置 `filter` 默认离线全部 -> 正则功能还没写 + +`/简体|\\d{3-4}[pP]/` 使用斜线包裹的正则规则。注意转义规则 cid 是离线到指定的文件夹的 id 。 获取方法: 浏览器打开 115 的文件,地址栏像 `https://115.com/?cid=2479224057885794455&offset=0&tab=&mode=wangpan` diff --git a/src/rss_site/mod.rs b/src/rss_site/mod.rs index 99557ca..60fc79a 100644 --- a/src/rss_site/mod.rs +++ b/src/rss_site/mod.rs @@ -2,6 +2,7 @@ mod dmhy; mod mikanani; mod nyaa; +use regex::Regex; use reqwest::Method; use rss::{Channel, Item}; use std::io::BufReader; @@ -75,7 +76,17 @@ pub async fn get_magnetitem_list(config: &RssConfig) -> Vec { let m = site.get_magnet_item(item); let mut flag = true; if let Some(pat) = &config.filter { - flag = m.title.contains(pat); + if pat.starts_with("/") && pat.ends_with("/") { + let re = Regex::new(&pat[1..pat.len() - 1]); + match re { + Ok(re) => { + flag = re.is_match(&m.title); + } + Err(_) => {} + } + } else { + flag = m.title.contains(pat); + } } if flag { item_list.push(m) @@ -116,4 +127,15 @@ mod tests { let res = service.save_items(&items, true); assert!(res.is_ok()); } + #[test] + fn test_re() { + let str_list = [ + "[7月新番][传颂之物 二人的白皇][Utawarerumono - Futari no Hakuoro][09][1080P][MP4][GB][简中] [241.72 MB]", + "【幻樱字幕组】【7月新番】【传颂之物 二人白皇 Utawarerumono-Futari no Hakuoro-】【16】【BIG5_MP4】【1920X1080】 [321.13 MB]", + "[动漫国字幕组&澄空学园&LoliHouse] 传颂之物 二人的白皇 / Utawarerumono Futari no Hakuoro - 16 [WebRip 1080p HEVC-10bit AAC][简繁外挂字幕] [485.4 MB]" + ]; + let pat = "/澄空学园|幻樱|\\d{4}[p]/"; + let re = Regex::new(&pat[1..pat.len() - 1]).unwrap(); + assert_eq!(str_list.map(|s| re.is_match(s)), [true, true, true]); + } }