Skip to content

Commit

Permalink
Merge branch 'ab-406-1'
Browse files Browse the repository at this point in the history
  • Loading branch information
alastair committed May 22, 2019
2 parents 0136cfe + 5c1f1db commit cc7677d
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 2 deletions.
89 changes: 89 additions & 0 deletions add_submission_offsets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from __future__ import print_function

from flask.cli import FlaskGroup
import click
from collections import defaultdict

import db
import webserver

from sqlalchemy import text

cli = FlaskGroup(add_default_commands=False, create_app=webserver.create_app_flaskgroup)


@cli.command(name='add-offsets')
@click.option("--limit", "-l", default=10000)
def add_offsets(limit):
"""Update lowlevel submission offsets with a specified limit."""
incremental_add_offset(limit)


def incremental_add_offset(limit):
with db.engine.connect() as connection:

# Find number of items in table
size_query = text("""
SELECT count(*) AS size
FROM lowlevel
WHERE submission_offset IS NULL
""")
size_result = connection.execute(size_query)
table_size = size_result.fetchone()["size"]

# Find max existing offsets
offset_query = text("""
SELECT gid, MAX(submission_offset)
FROM lowlevel
WHERE submission_offset IS NOT NULL
GROUP BY gid
""")
offset_result = connection.execute(offset_query)

max_offsets = defaultdict(int)
for gid, max_offset in offset_result.fetchall():
max_offsets[gid] = max_offset

# Find the next batch of items to update
batch_query = text("""
SELECT id, gid
FROM lowlevel
WHERE submission_offset IS NULL
ORDER BY id
LIMIT :limit
""")

batch_count = 0
item_count = 0
print("Starting batch insertions...")
print("============================")
while True:
batch_result = connection.execute(batch_query, {"limit": limit})
if not batch_result.rowcount:
print("Submission offset exists for all items. Exiting...")
break

batch_count += 1
print("Updating batch {}:".format(batch_count))
with connection.begin() as transaction:
for id, gid in batch_result.fetchall():
if gid in max_offsets:
# Current offset exists
max_offsets[gid] += 1
else:
# No existing offset
max_offsets[gid] = 0
offset = max_offsets[gid]

query = text("""
UPDATE lowlevel
SET submission_offset = :offset
WHERE id = :id
""")
connection.execute(query, {"id": id, "offset": offset})
item_count += 1
print(" Batch done, inserted {}/{} items...".format(item_count, table_size)),
print("")

print("============================")
print("Batch insertions finished.")
3 changes: 2 additions & 1 deletion admin/sql/create_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ CREATE TABLE lowlevel (
build_sha1 TEXT NOT NULL,
lossless BOOLEAN DEFAULT 'n',
submitted TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
gid_type gid_type NOT NULL
gid_type gid_type NOT NULL,
submission_offset INTEGER
);

CREATE TABLE lowlevel_json (
Expand Down
5 changes: 5 additions & 0 deletions admin/updates/20190508-submission-offsets-1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
BEGIN;

ALTER TABLE lowlevel ADD COLUMN submission_offset INTEGER;

COMMIT;
4 changes: 3 additions & 1 deletion manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import db.user
import webserver

import add_submission_offsets

ADMIN_SQL_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'admin', 'sql')

cli = FlaskGroup(add_default_commands=False, create_app=webserver.create_app_flaskgroup)
Expand Down Expand Up @@ -241,7 +243,7 @@ def toggle_site_status():

# Please keep additional sets of commands down there
cli.add_command(db.dump_manage.cli, name="dump")

cli.add_command(add_submission_offsets.cli, name="update-offsets")

if __name__ == '__main__':
cli()

0 comments on commit cc7677d

Please sign in to comment.