Skip to content

Commit

Permalink
feat: add RecipeRadar crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
jayaddison authored and lbarthon committed Sep 17, 2024
1 parent 1458dfa commit cee7823
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
6 changes: 5 additions & 1 deletion regexes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ user_agent_parsers:
- regex: '\[FB.{0,300};'
family_replacement: 'Facebook'

# RecipeRadar crawler
- regex: '(RecipeRadar)/(\d+)\.(\d+)\.(\d+)'
- regex: '(RecipeRadar)/(\d+)\.(\d+)'

# Bots General matcher 'name/0.0'
- regex: '^.{0,200}?(?:\/[A-Za-z0-9\.]{0,50}|) {0,2}([A-Za-z0-9 \-_\!\[\]:]{0,50}(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]{0,50}))[/ ](\d+)(?:\.(\d+)(?:\.(\d+)|)|)'
# Bots containing bot(but not CUBOT)
Expand Down Expand Up @@ -5905,7 +5909,7 @@ device_parsers:
##########
# Spiders (this is a hack...)
##########
- regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper)'
- regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|RecipeRadar)'
regex_flag: 'i'
device_replacement: 'Spider'
brand_replacement: 'Spider'
Expand Down
6 changes: 4 additions & 2 deletions tests/test_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80619,5 +80619,7 @@ test_cases:
brand: 'Motorola'
model: 'motorola moto g play (2021)'



- user_agent_string: 'Mozilla/5.0 (compatible; Linux x86_64; python-requests/2.32.3; RecipeRadar/0.1; +https://www.reciperadar.com)'
family: 'Spider'
brand: 'Spider'
model: 'Desktop'
6 changes: 6 additions & 0 deletions tests/test_ua.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8889,3 +8889,9 @@ test_cases:
minor: '3'
patch: '0'
patch_minor: '0'

- user_agent_string: 'Mozilla/5.0 (compatible; Linux x86_64; python-requests/2.32.3; RecipeRadar/0.1; +https://www.reciperadar.com)'
family: 'RecipeRadar'
major: '0'
minor: '1'
patch:

0 comments on commit cee7823

Please sign in to comment.