From 7b8b1e79e74971520ae2b43340948f80805fa97c Mon Sep 17 00:00:00 2001 From: Andy Chosak Date: Thu, 9 Nov 2023 07:39:46 -0500 Subject: [PATCH] Add two new fields to redirect export (#88) This change adds two new boolean fields to the redirect data available as a CSV download or via the API: - is_http_to_https: true if the redirect is only from an http:// URL to the equivalent https:// URL. - is_append_slash: true if the redirect is only from a URL to the same URL with a trailing slash appended. To test, visit http://localhost:8000/redirects/?format=api to browse the API or http://localhost:8000/redirects/?format=csv download redirect data as a CSV. --- crawler/models.py | 8 ++++++++ crawler/tests/test_models.py | 34 ++++++++++++++++++++++++++++++++-- viewer/serializers.py | 12 ++++++++++-- 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/crawler/models.py b/crawler/models.py index b30f958..cff470f 100644 --- a/crawler/models.py +++ b/crawler/models.py @@ -169,3 +169,11 @@ class Redirect(ErrorBase): def __str__(self): return super().__str__() + f" -> {self.location}" + + @property + def is_http_to_https(self): + return self.location == re.sub(r"^http://", "https://", self.url) + + @property + def is_append_slash(self): + return not self.url.endswith("/") and self.location == self.url + "/" diff --git a/crawler/tests/test_models.py b/crawler/tests/test_models.py index c6d1890..1ce3179 100644 --- a/crawler/tests/test_models.py +++ b/crawler/tests/test_models.py @@ -91,12 +91,14 @@ def test_from_html_no_body(self): class ErrorTests(SimpleTestCase): - def test_error_str(self): + def test_str(self): self.assertEqual( str(Error(url="/not-found/", status_code=404)), "/not-found/ 404 !" ) - def test_error_str_with_referrer(self): + +class RedirectTests(SimpleTestCase): + def test_str(self): self.assertEqual( str( Redirect( @@ -108,3 +110,31 @@ def test_error_str_with_referrer(self): ), "/redirect/ (from /source/) 301 -> /destination/", ) + + def test_is_http_to_https(self): + self.assertTrue( + Redirect( + url="http://example.com/", location="https://example.com/" + ).is_http_to_https + ) + + self.assertFalse( + Redirect( + url="http://example.com/", location="https://example.com" + ).is_http_to_https + ) + + self.assertFalse( + Redirect(url="https://example.com/", location="/foo/").is_http_to_https + ) + + def test_is_append_slash(self): + self.assertTrue( + Redirect( + url="https://example.com", location="https://example.com/" + ).is_append_slash + ) + + self.assertFalse( + Redirect(url="https://example.com/", location="/foo/").is_append_slash + ) diff --git a/viewer/serializers.py b/viewer/serializers.py index a8cb382..0e31e3b 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -82,5 +82,13 @@ class RedirectSerializer(serializers.ModelSerializer): class Meta: model = Redirect - fields = ["timestamp", "url", "status_code", "referrer", "redirect_url"] - csv_header = ErrorSerializer.Meta.csv_header + ["redirect_url"] + fields = ErrorSerializer.Meta.fields + [ + "redirect_url", + "is_http_to_https", + "is_append_slash", + ] + csv_header = ErrorSerializer.Meta.csv_header + [ + "redirect_url", + "is_http_to_https", + "is_append_slash", + ]