From 3b888801e07ad85948609e8f3acb106d036bd454 Mon Sep 17 00:00:00 2001 From: Andy Chosak Date: Wed, 3 Jul 2024 13:28:52 -0400 Subject: [PATCH] Don't crawl URLs on x.com (#93) The crawler logic currently skips links to twitter.com; as of https://github.com/cfpb/consumerfinance.gov/commit/846e03a1728bd568e3eefa4ef7ed1e3ccbcaeee1 the CFPB website now links to x.com instead. This change adds x.com to the crawl exclusion list. --- crawler/wpull_plugin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/crawler/wpull_plugin.py b/crawler/wpull_plugin.py index 0e27cea..26d194c 100644 --- a/crawler/wpull_plugin.py +++ b/crawler/wpull_plugin.py @@ -27,6 +27,7 @@ [ r"^https://www.facebook.com/dialog/share\?.*", r"^https://twitter.com/intent/tweet\?.*", + r"^https://x.com/intent/tweet\?.*", r"^https://www.linkedin.com/shareArticle\?.*", ], )