diff --git a/Tests/Parser/fixtures/oss.yml b/Tests/Parser/fixtures/oss.yml index 6b8e3be025..c409bb3629 100644 --- a/Tests/Parser/fixtures/oss.yml +++ b/Tests/Parser/fixtures/oss.yml @@ -4102,3 +4102,19 @@ version: 9.0.0 platform: family: Android +- + user_agent: Aloha/1 CFNetwork/1492.0.1 Darwin/23.3.0 + os: + name: iOS + short_name: IOS + version: "17.3" + platform: + family: iOS +- + user_agent: Safari/19617.1.17.11.9 CFNetwork/1490.0.4 Darwin/23.2.0 + os: + name: Mac + short_name: MAC + version: "14.2" + platform: + family: Mac diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index cd78dec89f..e09062a782 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -5881,3 +5881,39 @@ producer: name: Meltwater Deutschland GmbH url: https://www.meltwater.com/ +- + user_agent: Owler@ows.eu/1 + bot: + name: OWLer + category: Crawler + url: https://openwebsearch.eu/owler/ + producer: + name: Open Search Foundation e.V. + url: https://openwebsearch.eu/ +- + user_agent: OWLer/0.1 (built with StormCrawler; https://ows.eu/owler; owl@ow-s.eu + bot: + name: OWLer + category: Crawler + url: https://openwebsearch.eu/owler/ + producer: + name: Open Search Foundation e.V. + url: https://openwebsearch.eu/ +- + user_agent: Page Monitor (https://confluence.dev.bbc.co.uk/display/men/Page+Monitor) + bot: + name: BBC Page Monitor + category: Site Monitor + url: https://confluence.dev.bbc.co.uk/display/men/Page+Monitor + producer: + name: BBC + url: https://www.bbc.com/ +- + user_agent: BBC-Forge-URL-Monitor-Twisted + bot: + name: BBC Forge URL Monitor + category: Site Monitor + url: https://www.bbc.com/ + producer: + name: BBC + url: https://www.bbc.com/ diff --git a/regexes/bots.yml b/regexes/bots.yml index fbfd6dc22a..868bea9b96 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -3508,6 +3508,30 @@ name: 'Meltwater Deutschland GmbH' url: 'https://www.meltwater.com/' +- regex: '(?:Owler@ows.eu|OWLer)/([\d+.]+)' + name: 'OWLer' + category: 'Crawler' + url: 'https://openwebsearch.eu/owler/' + producer: + name: 'Open Search Foundation e.V.' + url: 'https://openwebsearch.eu/' + +- regex: 'bbc.co.uk/display/men/Page\+Monitor' + name: 'BBC Page Monitor' + category: 'Site Monitor' + url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor' + producer: + name: 'BBC' + url: 'https://www.bbc.com/' + +- regex: 'BBC-Forge-URL-Monitor-Twisted' + name: 'BBC Forge URL Monitor' + category: 'Site Monitor' + url: 'https://www.bbc.com/' + producer: + name: 'BBC' + url: 'https://www.bbc.com/' + # Generic detections - regex: '[a-z0-9\-_]*((?