From bde69cea9e053349bef6dfe08871a5001c5c633e Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Mon, 3 May 2021 17:20:05 +0200 Subject: [PATCH] Update user-agent parser with HTTP request libraries and download tools --- regexes.yaml | 24 ++++++++++++- test_resources/pgts_browser_list.yaml | 2 +- tests/test_ua.yaml | 49 +++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 2 deletions(-) diff --git a/regexes.yaml b/regexes.yaml index e7235e99..d843ed35 100644 --- a/regexes.yaml +++ b/regexes.yaml @@ -955,9 +955,25 @@ user_agent_parsers: - regex: '(Java)[/ ]?\d+\.(\d+)\.(\d+)[_-]*([a-zA-Z0-9]+|)' + # minio-go (https://github.com/minio/minio-go) + - regex: '(minio-go)/v(\d+)\.(\d+)\.(\d+)' + + # ureq - minimal request library in rust (https://github.com/algesten/ureq) + - regex: '^(ureq)[/ ](\d+)\.(\d+).(\d+)' + + # http.rb - HTTP (The Gem! a.k.a. http.rb) - a fast Ruby HTTP client + # (https://github.com/httprb/http/blob/3aa7470288deb81f7d7b982c1e2381871049dcbb/lib/http/request.rb#L27) + - regex: '^(http\.rb)/(\d+)\.(\d+).(\d+)' + + # Guzzle, PHP HTTP client (https://docs.guzzlephp.org/) + - regex: '^(GuzzleHttp)/(\d+)\.(\d+).(\d+)' + + # lorien/grab - Web Scraping Framework (https://github.com/lorien/grab) + - regex: '^(grab)\b' + # Cloud Storage Clients - regex: '^(Cyberduck)/(\d+)\.(\d+)\.(\d+)(?:\.\d+|)' - - regex: '^(S3 Browser) (\d+)-(\d+)-(\d+)(?:\s*http://s3browser\.com|)' + - regex: '^(S3 Browser) (\d+)[.-](\d+)[.-](\d+)(?:\s*https?://s3browser\.com|)' - regex: '(S3Gof3r)' # IBM COS (Cloud Object Storage) API - regex: '\b(ibm-cos-sdk-(?:core|java|js|python))/(\d+)\.(\d+)(?:\.(\d+)|)' @@ -980,6 +996,12 @@ user_agent_parsers: - regex: '^(ViaFree|Viafree)-(?:tvOS-)?[A-Z]{2}/(\d+)\.(\d+)\.(\d+)' family_replacement: 'ViaFree' + # Transmit (https://library.panic.com/transmit/) + - regex: '(Transmit)/(\d+)\.(\d+)\.(\d+)' + + # Download Master (https://downloadmaster.ru/) + - regex: '(Download Master)' + os_parsers: ########## # HbbTV vendors diff --git a/test_resources/pgts_browser_list.yaml b/test_resources/pgts_browser_list.yaml index 3382d49c..35f6be62 100755 --- a/test_resources/pgts_browser_list.yaml +++ b/test_resources/pgts_browser_list.yaml @@ -1291,7 +1291,7 @@ test_cases: patch: - user_agent_string: 'Download Master' - family: 'Other' + family: 'Download Master' major: minor: patch: diff --git a/tests/test_ua.yaml b/tests/test_ua.yaml index d0a0be92..d5c784cb 100644 --- a/tests/test_ua.yaml +++ b/tests/test_ua.yaml @@ -7946,6 +7946,12 @@ test_cases: minor: '4' patch: '5' + - user_agent_string: 'S3 Browser 8.6.7 https://s3browser.com' + family: 'S3 Browser' + major: '8' + minor: '6' + patch: '7' + - user_agent_string: 'rclone/v1.34' family: 'rclone' major: '1' @@ -8568,3 +8574,46 @@ test_cases: major: '1' minor: '0' patch: + + - user_agent_string: 'MinIO (linux; amd64) minio-go/v6.0.39 mc/2019-10-09T22:54:57Z' + family: 'minio-go' + major: '6' + minor: '0' + patch: '39' + patch_minor: '' + + - user_agent_string: 'MinIO (darwin; amd64) minio-go/v6.0.45 mc/2019-12-17T23:26:28Z' + family: 'minio-go' + major: '6' + minor: '0' + patch: '45' + patch_minor: '' + + - user_agent_string: 'http.rb/4.1.1' + family: 'http.rb' + major: '4' + minor: '1' + patch: '1' + patch_minor: '' + + - user_agent_string: 'ureq/1.5.1' + family: 'ureq' + major: '1' + minor: '5' + patch: '1' + patch_minor: '' + + - user_agent_string: 'Transmit/5.6.0' + family: 'Transmit' + major: '5' + minor: '6' + patch: '0' + patch_minor: '' + + - user_agent_string: 'GuzzleHttp/6.3.3 PHP/7.1.17-1+0~20180505045738.17+stretch~1.gbpde69c6' + family: 'GuzzleHttp' + major: '6' + minor: '3' + patch: '3' + patch_minor: '' +