Skip to content

Commit

Permalink
Add: produce a sitemap on request and tell in robots.txt we have that
Browse files Browse the repository at this point in the history
This should improve the speed search engines pick up new pages.
  • Loading branch information
TrueBrain committed Nov 22, 2020
1 parent 48032a5 commit 8091548
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 3 deletions.
2 changes: 2 additions & 0 deletions truewiki/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
SESSION_COOKIE_NAME,
)
from .views.page import click_page
from .views.sitemap import click_sitemap
from .web_routes import (
click_web_routes,
routes,
Expand Down Expand Up @@ -104,6 +105,7 @@ async def wait_for_storage():
@click_user_session
@click_user_github
@click_page
@click_sitemap
@click.option("--validate-all", help="Validate all mediawiki files and report all errors", is_flag=True)
def main(bind, port, storage, validate_all):
log.info("Reload storage ..")
Expand Down
3 changes: 3 additions & 0 deletions truewiki/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
config,
singleton,
)
from .views import sitemap
from .wiki_page import WikiPage

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -340,6 +341,8 @@ def _post(self):

PAGES_LC[page.lower()] = page

sitemap.invalidate_cache()

async def page_changed(self):
for page in self.pages:
await _page_changed(page)
Expand Down
86 changes: 86 additions & 0 deletions truewiki/views/sitemap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import click
import os
import urllib.parse

from aiohttp import web
from openttd_helpers import click_helper

from . import page as page_view
from .. import metadata

FRONTEND_URL = None


def view() -> web.Response:
if FRONTEND_URL is None:
raise web.HTTPNotFound

if page_view.CACHE_PAGE_FOLDER:
cache_filename = f"{page_view.CACHE_PAGE_FOLDER}/sitemap.xml"
else:
cache_filename = None

# Check if we have this file in cache first.
if cache_filename and os.path.exists(cache_filename):
with open(cache_filename) as fp:
body = fp.read()
else:
body = '<?xml version="1.0" encoding="UTF-8"?>\n'
body += (
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" '
'xmlns:xhtml="http://www.w3.org/1999/xhtml">\n'
)

for page, page_data in metadata.PAGES.items():
if page.startswith("Page/"):
page = page[len("Page/") :]
page = urllib.parse.quote(page)

body += "<url>\n"
body += f"<loc>{FRONTEND_URL}/{page}</loc>\n"

if len(page_data["translations"]) == 1:
en_page = page_data["translations"][0]

if len(metadata.TRANSLATIONS[en_page]) > 1:
for translation in metadata.TRANSLATIONS[en_page]:
if translation.startswith("Page/"):
language = translation.split("/")[1]
translation = translation[len("Page/") :]
else:
language = translation.split("/")[1]

translation = urllib.parse.quote(translation)
body += (
f'<xhtml:link rel="alternate" hreflang="{language}" '
f'href="{FRONTEND_URL}/{translation}" />\n'
)

body += "</url>\n"

body += "</urlset>\n"

if cache_filename:
# Store in cache for next time it is requested.
with open(cache_filename, "w") as fp:
fp.write(body)

return web.Response(body=body, content_type="application/xml")


def invalidate_cache() -> None:
cache_filename = f"{page_view.CACHE_PAGE_FOLDER}/sitemap.xml"

if os.path.exists(cache_filename):
os.unlink(cache_filename)


@click_helper.extend
@click.option(
"--frontend-url",
help="URL of the frontend, used for creating absolute links in the sitemap.xml",
)
def click_sitemap(frontend_url):
global FRONTEND_URL

FRONTEND_URL = frontend_url
19 changes: 16 additions & 3 deletions truewiki/web_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
)
from .views import (
edit,
license as license_page,
license,
login,
source,
page as view_page,
preview,
sitemap,
)
from .user_session import (
SESSION_COOKIE_NAME,
Expand Down Expand Up @@ -111,9 +112,21 @@ async def healthz_handler(request):

@routes.get("/License")
@csp_header
async def license(request):
async def license_page(request):
user = get_user_by_bearer(request.cookies.get(SESSION_COOKIE_NAME))
return license_page.view(user)
return license.view(user)


@routes.get("/sitemap.xml")
@csp_header
async def sitemap_page(request):
return sitemap.view()


@routes.get("/robots.txt")
@csp_header
async def robots(request):
return web.Response(body="User-agent: *\nSitemap: /sitemap.xml", content_type="text/plain")


@routes.get("/edit/{page:.*}")
Expand Down

0 comments on commit 8091548

Please sign in to comment.