Skip to content

Commit

Permalink
Add two new fields to redirect export (#88)
Browse files Browse the repository at this point in the history
This change adds two new boolean fields to the redirect data available
as a CSV download or via the API:

- is_http_to_https: true if the redirect is only from an http:// URL to
the equivalent https:// URL.
- is_append_slash: true if the redirect is only from a URL to the same
URL with a trailing slash appended.

To test, visit http://localhost:8000/redirects/?format=api to browse
the API or http://localhost:8000/redirects/?format=csv download
redirect data as a CSV.
  • Loading branch information
chosak authored Nov 9, 2023
1 parent 029a584 commit 7b8b1e7
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 4 deletions.
8 changes: 8 additions & 0 deletions crawler/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,11 @@ class Redirect(ErrorBase):

def __str__(self):
return super().__str__() + f" -> {self.location}"

@property
def is_http_to_https(self):
return self.location == re.sub(r"^http://", "https://", self.url)

@property
def is_append_slash(self):
return not self.url.endswith("/") and self.location == self.url + "/"
34 changes: 32 additions & 2 deletions crawler/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,14 @@ def test_from_html_no_body(self):


class ErrorTests(SimpleTestCase):
def test_error_str(self):
def test_str(self):
self.assertEqual(
str(Error(url="/not-found/", status_code=404)), "/not-found/ 404 !"
)

def test_error_str_with_referrer(self):

class RedirectTests(SimpleTestCase):
def test_str(self):
self.assertEqual(
str(
Redirect(
Expand All @@ -108,3 +110,31 @@ def test_error_str_with_referrer(self):
),
"/redirect/ (from /source/) 301 -> /destination/",
)

def test_is_http_to_https(self):
self.assertTrue(
Redirect(
url="http://example.com/", location="https://example.com/"
).is_http_to_https
)

self.assertFalse(
Redirect(
url="http://example.com/", location="https://example.com"
).is_http_to_https
)

self.assertFalse(
Redirect(url="https://example.com/", location="/foo/").is_http_to_https
)

def test_is_append_slash(self):
self.assertTrue(
Redirect(
url="https://example.com", location="https://example.com/"
).is_append_slash
)

self.assertFalse(
Redirect(url="https://example.com/", location="/foo/").is_append_slash
)
12 changes: 10 additions & 2 deletions viewer/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,13 @@ class RedirectSerializer(serializers.ModelSerializer):

class Meta:
model = Redirect
fields = ["timestamp", "url", "status_code", "referrer", "redirect_url"]
csv_header = ErrorSerializer.Meta.csv_header + ["redirect_url"]
fields = ErrorSerializer.Meta.fields + [
"redirect_url",
"is_http_to_https",
"is_append_slash",
]
csv_header = ErrorSerializer.Meta.csv_header + [
"redirect_url",
"is_http_to_https",
"is_append_slash",
]

0 comments on commit 7b8b1e7

Please sign in to comment.