-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a JSONB column to the database to make jsonpath queries
Signed-off-by: Aurélien Bompard <[email protected]>
- Loading branch information
Showing
15 changed files
with
409 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import datetime | ||
import logging | ||
|
||
import click | ||
from fedora_messaging.message import load_message as load_message | ||
from sqlalchemy import cast, select, update | ||
from sqlalchemy.dialects import postgresql | ||
|
||
import datanommer.models as m | ||
|
||
from .utils import config_option, get_config | ||
|
||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
@click.command() | ||
@config_option | ||
@click.option( | ||
"--chunk-size", | ||
default=30, | ||
type=int, | ||
show_default=True, | ||
help="Go through messages these many days at a time (lower is slower but saves memory).", | ||
) | ||
@click.option( | ||
"--debug", | ||
is_flag=True, | ||
help="Show more information.", | ||
) | ||
def main(config_path, chunk_size, debug): | ||
"""Go over old messages and populate the msg_json field.""" | ||
config = get_config(config_path) | ||
logging.basicConfig(level=logging.DEBUG if debug else logging.INFO, format="%(message)s") | ||
m.init( | ||
config["datanommer_sqlalchemy_url"], | ||
alembic_ini=config["alembic_ini"], | ||
) | ||
|
||
query = select(m.Message).where(m.Message.msg_json.is_(None)) | ||
first_message = m.session.scalars(query.order_by(m.Message.timestamp).limit(1)).first() | ||
if first_message is None: | ||
click.echo("No message to populate.") | ||
return | ||
|
||
for start_date, end_date in iterate_over_time( | ||
first_message.timestamp, datetime.timedelta(days=chunk_size) | ||
): | ||
log.debug( | ||
"Converting messages between %s and %s", | ||
start_date.date().isoformat(), | ||
end_date.date().isoformat(), | ||
) | ||
# Fill the msg_json column from the contents of the msg_raw column | ||
query = ( | ||
update(m.Message) | ||
.where( | ||
m.Message.msg_json.is_(None), | ||
m.Message.timestamp >= start_date, | ||
m.Message.timestamp < end_date, | ||
) | ||
.values(msg_json=cast(m.Message.msg_raw, postgresql.JSONB(none_as_null=True))) | ||
) | ||
result = m.session.execute(query) | ||
m.session.commit() | ||
log.debug("Populated %s rows", result.rowcount) | ||
# Empty the msg_raw column if msg_json is not filled | ||
query = ( | ||
update(m.Message) | ||
.where( | ||
m.Message.msg_json.is_not(None), | ||
m.Message.timestamp >= start_date, | ||
m.Message.timestamp < end_date, | ||
) | ||
.values(msg_raw=None) | ||
) | ||
result = m.session.execute(query) | ||
log.debug("Purged %s rows", result.rowcount) | ||
|
||
|
||
def iterate_over_time(start_at, interval): | ||
intervals = [] | ||
start_date = start_at | ||
now = datetime.datetime.now() | ||
while start_date < now: | ||
end_date = start_date + interval | ||
intervals.append((start_date, end_date)) | ||
start_date = end_date | ||
|
||
total = len(intervals) | ||
with click.progressbar(length=total) as bar: | ||
for start_date, end_date in intervals: | ||
yield start_date, end_date | ||
m.session.commit() | ||
m.session.expunge_all() | ||
bar.update(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import logging | ||
|
||
import click | ||
from fedora_messaging import config as fedora_messaging_config | ||
from fedora_messaging.message import load_message as load_message | ||
from sqlalchemy import func | ||
|
||
import datanommer.models as m | ||
|
||
|
||
# Go trough messages these many at a time | ||
CHUNK_SIZE = 10000 | ||
log = logging.getLogger(__name__) | ||
|
||
|
||
def get_config(config_path=None): | ||
if config_path: | ||
fedora_messaging_config.conf.load_config(config_path) | ||
conf = fedora_messaging_config.conf["consumer_config"] | ||
for key in ("datanommer_sqlalchemy_url", "alembic_ini"): | ||
if key not in conf: | ||
raise click.ClickException(f"{key} not defined in the fedora-messaging config") | ||
return conf | ||
|
||
|
||
config_option = click.option( | ||
"-c", | ||
"--config", | ||
"config_path", | ||
help="Load this Fedora Messaging config file", | ||
type=click.Path(exists=True, readable=True), | ||
) | ||
|
||
|
||
def iterate_over_messages(query, chunk_size): | ||
total = m.session.scalar(query.with_only_columns(func.count(m.Message.id))) | ||
if not total: | ||
click.echo("No messages matched.") | ||
return | ||
|
||
click.echo(f"Considering {total} message{'s' if total > 1 else ''}") | ||
|
||
query = query.order_by(m.Message.timestamp) | ||
with click.progressbar(length=total) as bar: | ||
for chunk in range(int(total / chunk_size) + 1): | ||
offset = chunk * chunk_size | ||
chunk_query = query.limit(chunk_size).offset(offset) | ||
for message in m.session.scalars(chunk_query): | ||
bar.update(1) | ||
yield message | ||
m.session.commit() | ||
m.session.expunge_all() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from unittest.mock import Mock | ||
|
||
import pytest | ||
from click.testing import CliRunner | ||
|
||
import datanommer.models as m | ||
from datanommer.commands.populate_json import main as populate_json | ||
|
||
from .utils import generate_bodhi_update_complete_message | ||
|
||
|
||
@pytest.fixture | ||
def bodhi_message_db(datanommer_models): | ||
msg = generate_bodhi_update_complete_message() | ||
m.add(msg) | ||
msg_in_db = m.Message.from_msg_id(msg.id) | ||
msg_in_db.msg_raw = msg_in_db.msg_json | ||
msg_in_db.msg_json = None | ||
m.session.commit() | ||
m.session.refresh(msg_in_db) | ||
assert msg_in_db.msg_json is None | ||
return msg_in_db | ||
|
||
|
||
@pytest.fixture(autouse=True) | ||
def no_expunge(datanommer_models, monkeypatch): | ||
monkeypatch.setattr(m.session, "expunge_all", Mock(name="expunge_all")) | ||
monkeypatch.setattr(m.session, "expunge", Mock(name="expunge")) | ||
|
||
|
||
def test_populate_json(bodhi_message_db, mock_config, mock_init): | ||
runner = CliRunner() | ||
result = runner.invoke(populate_json) | ||
|
||
assert result.exit_code == 0, result.output | ||
|
||
m.session.refresh(bodhi_message_db) | ||
print(bodhi_message_db.msg_json) | ||
assert bodhi_message_db.msg_json is not None | ||
assert bodhi_message_db.msg_raw is None | ||
total, _pages, _messages = m.Message.grep(jsons=['$.comment.user.name == "dudemcpants"']) | ||
assert total == 1 | ||
assert _messages == [bodhi_message_db] | ||
|
||
|
||
def test_populate_json_no_message(monkeypatch, mock_config, mock_init): | ||
monkeypatch.setattr(m.session, "execute", Mock(name="execute")) | ||
runner = CliRunner() | ||
result = runner.invoke(populate_json) | ||
assert result.exit_code == 0, result.output | ||
assert result.output == "No message to populate.\n" | ||
m.session.execute.assert_not_called() |
Oops, something went wrong.