Skip to content

Commit

Permalink
Fix: validate that cached files on disk are fresh (#85)
Browse files Browse the repository at this point in the history
If a logged-in user generated a file, LAST_TIME_RENDERED was set,
but the cache on disk is not written. Now if there was an older
version of the cache still on disk, on next request from a visitor
that is not logged-in, the old cache would be served.

Validate that the time set in LAST_TIME_RENDERED is in fact also
the time of the cache-file on disk. If not, the file on disk is
stale and a new page should be rendered.
  • Loading branch information
TrueBrain authored Nov 14, 2020
1 parent 7101592 commit 3601ffc
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 20 deletions.
29 changes: 12 additions & 17 deletions truewiki/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ def page():
RELOAD_BUSY.set()


def _delete_cached_page(page):
if page in LAST_TIME_RENDERED:
del LAST_TIME_RENDERED[page]


def translation_callback(wtp, wiki_page, page):
for wikilink in wtp.wikilinks:
if wikilink.target.startswith("Translation:"):
Expand All @@ -56,8 +61,7 @@ def translation_callback(wtp, wiki_page, page):
# Reset the last time rendered for all translations too, as
# otherwise a new translation won't show up on those pages.
for translation in TRANSLATIONS[target]:
if translation in LAST_TIME_RENDERED:
del LAST_TIME_RENDERED[translation]
_delete_cached_page(translation)


def category_callback(wtp, wiki_page, page):
Expand All @@ -69,8 +73,7 @@ def category_callback(wtp, wiki_page, page):
CATEGORIES[target].append(page)

# Reset the last time rendered for the category.
if f"Category/{target}" in LAST_TIME_RENDERED:
del LAST_TIME_RENDERED[f"Category/{target}"]
_delete_cached_page(f"Category/{target}")


def file_callback(wtp, wiki_page, page):
Expand All @@ -82,8 +85,7 @@ def file_callback(wtp, wiki_page, page):
FILES[target].append(page)

# Reset the last time rendered for the file.
if f"File/{target}" in LAST_TIME_RENDERED:
del LAST_TIME_RENDERED[f"File/{target}"]
_delete_cached_page(f"File/{target}")


def links_callback(wtp, wiki_page, page):
Expand Down Expand Up @@ -125,14 +127,10 @@ def template_callback(wtp, wiki_page, page):
def _forget_page(page):
for category in PAGES[page]["categories"]:
CATEGORIES[category].remove(page)

if f"Category/{category}" in LAST_TIME_RENDERED:
del LAST_TIME_RENDERED[f"Category/{category}"]
_delete_cached_page(f"Category/{category}")
for file in PAGES[page]["files"]:
FILES[file].remove(page)

if f"File/{file}" in LAST_TIME_RENDERED:
del LAST_TIME_RENDERED[f"File/{file}"]
_delete_cached_page(f"File/{file}")
for link in PAGES[page]["links"]:
LINKS[link].remove(page)
for template in PAGES[page]["templates"]:
Expand All @@ -144,9 +142,7 @@ def _forget_page(page):
# otherwise a removed translation will still show up on those pages.
if not translation.startswith(("Category/", "File/", "Template/")):
translation = f"Page/{translation}"

if translation in LAST_TIME_RENDERED:
del LAST_TIME_RENDERED[translation]
_delete_cached_page(translation)

PAGES[page]["categories"].clear()
PAGES[page]["files"].clear()
Expand Down Expand Up @@ -200,8 +196,7 @@ async def _page_changed(page, notified=None):
# As we are invalidating this page, also reset when we last rendered it.
# This means that on a next request for this page, browsers will be
# given a new version too.
if page in LAST_TIME_RENDERED:
del LAST_TIME_RENDERED[page]
_delete_cached_page(page)

# Capture the current templates ued. After analysis, this might have
# changed, but those are still pages that need to be analyzed again.
Expand Down
20 changes: 17 additions & 3 deletions truewiki/views/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ def view(user, page: str, if_modified_since) -> web.Response:
# We already rendered this page before. If the browser has it in his
# cache, he can simply reuse that if we haven't rendered since.
response = web.HTTPNotModified()
elif not user and cache_filename and os.path.exists(cache_filename):
elif (
not user
and cache_filename
and os.path.exists(cache_filename)
and os.path.getmtime(cache_filename) >= metadata.LAST_TIME_RENDERED[namespaced_page]
):
# We already rendered this page to disk. Serve from there.
with open(cache_filename) as fp:
body = fp.read()
Expand All @@ -85,14 +90,23 @@ def view(user, page: str, if_modified_since) -> web.Response:

# Never cache anything in the Folder/.
if can_cache:
metadata.LAST_TIME_RENDERED[namespaced_page] = time.time()

if not user and cache_filename:
# Cache the file on disk
os.makedirs(os.path.dirname(cache_filename), exist_ok=True)
with open(cache_filename, "w") as fp:
fp.write(body)

page_time = os.path.getmtime(cache_filename)
else:
# Accuracy of time.time() is higher than getmtime(), so
# depending if we cached, use a different clock.
page_time = time.time()

# Only update the time if we don't have one yet. This makes sure
# that LAST_TIME_RENDERED has the oldest timestamp possible.
if namespaced_page not in metadata.LAST_TIME_RENDERED:
metadata.LAST_TIME_RENDERED[namespaced_page] = page_time

response = web.Response(body=body, content_type="text/html", status=status_code)

# Inform the browser under which rules it can cache this page.
Expand Down

0 comments on commit 3601ffc

Please sign in to comment.