From 0589b6f5c6983c9cf3b23a67b66d360f8cd34156 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Bompard?= <aurelien@bompard.org>
Date: Thu, 2 May 2024 17:29:56 +0200
Subject: [PATCH] Add a JSONB column to the database to make jsonpath queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Aurélien Bompard <aurelien@bompard.org>
---
 .../datanommer/commands/__init__.py           | 22 +----
 .../datanommer/commands/extract_users.py      | 43 +++------
 .../datanommer/commands/populate_json.py      | 96 +++++++++++++++++++
 .../datanommer/commands/utils.py              | 52 ++++++++++
 datanommer.commands/pyproject.toml            |  1 +
 datanommer.commands/tests/conftest.py         |  2 +-
 datanommer.commands/tests/test_commands.py    | 10 +-
 .../tests/test_extract_users.py               |  3 +-
 .../tests/test_populate_json.py               | 52 ++++++++++
 .../datanommer/models/__init__.py             | 54 ++++++++++-
 .../versions/0ce25211a227_message_msg_raw.py  | 25 +++++
 .../f6918385051f_messages_headers_index.py    | 29 ++++++
 .../versions/f6d590f5c53f_message_msg_json.py | 41 ++++++++
 datanommer.models/news/PR1312.feature         |  1 +
 datanommer.models/tests/test_model.py         | 38 ++++++++
 15 files changed, 409 insertions(+), 60 deletions(-)
 create mode 100644 datanommer.commands/datanommer/commands/populate_json.py
 create mode 100644 datanommer.commands/datanommer/commands/utils.py
 create mode 100644 datanommer.commands/tests/test_populate_json.py
 create mode 100644 datanommer.models/datanommer/models/alembic/versions/0ce25211a227_message_msg_raw.py
 create mode 100644 datanommer.models/datanommer/models/alembic/versions/f6918385051f_messages_headers_index.py
 create mode 100644 datanommer.models/datanommer/models/alembic/versions/f6d590f5c53f_message_msg_json.py
 create mode 100644 datanommer.models/news/PR1312.feature

diff --git a/datanommer.commands/datanommer/commands/__init__.py b/datanommer.commands/datanommer/commands/__init__.py
index 481b39bdc..b72fa690e 100644
--- a/datanommer.commands/datanommer/commands/__init__.py
+++ b/datanommer.commands/datanommer/commands/__init__.py
@@ -21,36 +21,18 @@
 from datetime import datetime, timedelta, timezone
 
 import click
-from fedora_messaging import config as fedora_messaging_config
 from sqlalchemy import func, select
 
 import datanommer.models as m
 
+from .utils import config_option, get_config
+
 
 __version__ = importlib.metadata.version("datanommer.commands")
 
 log = logging.getLogger("datanommer")
 
 
-def get_config(config_path=None):
-    if config_path:
-        fedora_messaging_config.conf.load_config(config_path)
-    conf = fedora_messaging_config.conf["consumer_config"]
-    for key in ("datanommer_sqlalchemy_url", "alembic_ini"):
-        if key not in conf:
-            raise click.ClickException(f"{key} not defined in the fedora-messaging config")
-    return conf
-
-
-config_option = click.option(
-    "-c",
-    "--config",
-    "config_path",
-    help="Load this Fedora Messaging config file",
-    type=click.Path(exists=True, readable=True),
-)
-
-
 @click.command()
 @config_option
 def create(config_path):
diff --git a/datanommer.commands/datanommer/commands/extract_users.py b/datanommer.commands/datanommer/commands/extract_users.py
index 63855c89c..238a601ae 100644
--- a/datanommer.commands/datanommer/commands/extract_users.py
+++ b/datanommer.commands/datanommer/commands/extract_users.py
@@ -4,15 +4,13 @@
 import click
 from fedora_messaging.exceptions import ValidationError
 from fedora_messaging.message import load_message as load_message
-from sqlalchemy import and_, func, not_, select
+from sqlalchemy import and_, not_, select
 
 import datanommer.models as m
 
-from . import config_option, get_config
+from .utils import CHUNK_SIZE, config_option, get_config, iterate_over_messages
 
 
-# Go trough messages these at a time
-CHUNK_SIZE = 10000
 log = logging.getLogger(__name__)
 
 SKIP_TOPICS = [
@@ -111,32 +109,17 @@ def main(config_path, topic, category, start, end, force_schema, chunk_size, deb
         isouter=True,
     ).where(m.users_assoc_table.c.msg_id.is_(None))
 
-    total = m.session.scalar(query.with_only_columns(func.count(m.Message.id)))
-    if not total:
-        click.echo("No messages matched.")
-        return
-
-    click.echo(f"Considering {total} message{'s' if total > 1 else ''}")
-
-    query = query.order_by(m.Message.timestamp)
-    with click.progressbar(length=total) as bar:
-        for chunk in range(int(total / chunk_size) + 1):
-            offset = chunk * chunk_size
-            chunk_query = query.limit(chunk_size).offset(offset)
-            for message in m.session.scalars(chunk_query):
-                bar.update(1)
-                usernames = get_usernames(message, force_schema=force_schema)
-                if not usernames:
-                    m.session.expunge(message)
-                    continue
-                message._insert_list(m.User, m.users_assoc_table, usernames)
-                if debug:
-                    click.echo(
-                        f"Usernames for message {message.msg_id} of topic {message.topic}"
-                        f": {', '.join(usernames)}"
-                    )
-            m.session.commit()
-            m.session.expunge_all()
+    for message in iterate_over_messages(query, chunk_size):
+        usernames = get_usernames(message, force_schema=force_schema)
+        if not usernames:
+            m.session.expunge(message)
+            continue
+        message._insert_list(m.User, m.users_assoc_table, usernames)
+        if debug:
+            click.echo(
+                f"Usernames for message {message.msg_id} of topic {message.topic}"
+                f": {', '.join(usernames)}"
+            )
 
 
 def get_usernames(db_message, force_schema):
diff --git a/datanommer.commands/datanommer/commands/populate_json.py b/datanommer.commands/datanommer/commands/populate_json.py
new file mode 100644
index 000000000..ccb90e16d
--- /dev/null
+++ b/datanommer.commands/datanommer/commands/populate_json.py
@@ -0,0 +1,96 @@
+import datetime
+import logging
+
+import click
+from fedora_messaging.message import load_message as load_message
+from sqlalchemy import cast, select, update
+from sqlalchemy.dialects import postgresql
+
+import datanommer.models as m
+
+from .utils import config_option, get_config
+
+
+log = logging.getLogger(__name__)
+
+
+@click.command()
+@config_option
+@click.option(
+    "--chunk-size",
+    default=30,
+    type=int,
+    show_default=True,
+    help="Go through messages these many days at a time (lower is slower but saves memory).",
+)
+@click.option(
+    "--debug",
+    is_flag=True,
+    help="Show more information.",
+)
+def main(config_path, chunk_size, debug):
+    """Go over old messages and populate the msg_json field."""
+    config = get_config(config_path)
+    logging.basicConfig(level=logging.DEBUG if debug else logging.INFO, format="%(message)s")
+    m.init(
+        config["datanommer_sqlalchemy_url"],
+        alembic_ini=config["alembic_ini"],
+    )
+
+    query = select(m.Message).where(m.Message.msg_json.is_(None))
+    first_message = m.session.scalars(query.order_by(m.Message.timestamp).limit(1)).first()
+    if first_message is None:
+        click.echo("No message to populate.")
+        return
+
+    for start_date, end_date in iterate_over_time(
+        first_message.timestamp, datetime.timedelta(days=chunk_size)
+    ):
+        log.debug(
+            "Converting messages between %s and %s",
+            start_date.date().isoformat(),
+            end_date.date().isoformat(),
+        )
+        # Fill the msg_json column from the contents of the msg_raw column
+        query = (
+            update(m.Message)
+            .where(
+                m.Message.msg_json.is_(None),
+                m.Message.timestamp >= start_date,
+                m.Message.timestamp < end_date,
+            )
+            .values(msg_json=cast(m.Message.msg_raw, postgresql.JSONB(none_as_null=True)))
+        )
+        result = m.session.execute(query)
+        m.session.commit()
+        log.debug("Populated %s rows", result.rowcount)
+        # Empty the msg_raw column if msg_json is not filled
+        query = (
+            update(m.Message)
+            .where(
+                m.Message.msg_json.is_not(None),
+                m.Message.timestamp >= start_date,
+                m.Message.timestamp < end_date,
+            )
+            .values(msg_raw=None)
+        )
+        result = m.session.execute(query)
+        log.debug("Purged %s rows", result.rowcount)
+
+
+def iterate_over_time(start_at, interval):
+    intervals = []
+    start_date = start_at
+    now = datetime.datetime.now()
+    while start_date < now:
+        end_date = start_date + interval
+        intervals.append((start_date, end_date))
+        start_date = end_date
+
+    total = len(intervals)
+    with click.progressbar(length=total) as bar:
+        for start_date, end_date in intervals:
+            yield start_date, end_date
+            m.session.commit()
+            m.session.expunge_all()
+            bar.update(1)
diff --git a/datanommer.commands/datanommer/commands/utils.py b/datanommer.commands/datanommer/commands/utils.py
new file mode 100644
index 000000000..1b623b367
--- /dev/null
+++ b/datanommer.commands/datanommer/commands/utils.py
@@ -0,0 +1,52 @@
+import logging
+
+import click
+from fedora_messaging import config as fedora_messaging_config
+from fedora_messaging.message import load_message as load_message
+from sqlalchemy import func
+
+import datanommer.models as m
+
+
+# Go trough messages these many at a time
+CHUNK_SIZE = 10000
+log = logging.getLogger(__name__)
+
+
+def get_config(config_path=None):
+    if config_path:
+        fedora_messaging_config.conf.load_config(config_path)
+    conf = fedora_messaging_config.conf["consumer_config"]
+    for key in ("datanommer_sqlalchemy_url", "alembic_ini"):
+        if key not in conf:
+            raise click.ClickException(f"{key} not defined in the fedora-messaging config")
+    return conf
+
+
+config_option = click.option(
+    "-c",
+    "--config",
+    "config_path",
+    help="Load this Fedora Messaging config file",
+    type=click.Path(exists=True, readable=True),
+)
+
+
+def iterate_over_messages(query, chunk_size):
+    total = m.session.scalar(query.with_only_columns(func.count(m.Message.id)))
+    if not total:
+        click.echo("No messages matched.")
+        return
+
+    click.echo(f"Considering {total} message{'s' if total > 1 else ''}")
+
+    query = query.order_by(m.Message.timestamp)
+    with click.progressbar(length=total) as bar:
+        for chunk in range(int(total / chunk_size) + 1):
+            offset = chunk * chunk_size
+            chunk_query = query.limit(chunk_size).offset(offset)
+            for message in m.session.scalars(chunk_query):
+                bar.update(1)
+                yield message
+            m.session.commit()
+            m.session.expunge_all()
diff --git a/datanommer.commands/pyproject.toml b/datanommer.commands/pyproject.toml
index f5a6c317d..06ba3b225 100644
--- a/datanommer.commands/pyproject.toml
+++ b/datanommer.commands/pyproject.toml
@@ -43,6 +43,7 @@ datanommer-dump = "datanommer.commands:dump"
 datanommer-stats = "datanommer.commands:stats"
 datanommer-latest = "datanommer.commands:latest"
 datanommer-extract-users = "datanommer.commands.extract_users:main"
+datanommer-populate-json = "datanommer.commands.populate_json:main"
 
 
 [build-system]
diff --git a/datanommer.commands/tests/conftest.py b/datanommer.commands/tests/conftest.py
index 4c046f604..54f7e7d86 100644
--- a/datanommer.commands/tests/conftest.py
+++ b/datanommer.commands/tests/conftest.py
@@ -16,7 +16,7 @@ def mock_init(mocker):
 @pytest.fixture
 def mock_config(mocker):
     mocker.patch.dict(
-        datanommer.commands.fedora_messaging_config.conf["consumer_config"],
+        datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"],
         {
             "datanommer_sqlalchemy_url": "",
             "alembic_ini": None,
diff --git a/datanommer.commands/tests/test_commands.py b/datanommer.commands/tests/test_commands.py
index 58c55515c..a532d2631 100644
--- a/datanommer.commands/tests/test_commands.py
+++ b/datanommer.commands/tests/test_commands.py
@@ -29,7 +29,7 @@
 
 def test_get_datanommer_sqlalchemy_url_keyerror(mocker):
     mocker.patch.dict(
-        datanommer.commands.fedora_messaging_config.conf["consumer_config"],
+        datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"],
         {},
         clear=True,
     )
@@ -42,9 +42,11 @@ def test_get_datanommer_sqlalchemy_url_config(mocker):
         "datanommer_sqlalchemy_url": "",
         "alembic_ini": "/some/where",
     }
-    mocker.patch.dict(datanommer.commands.fedora_messaging_config.conf["consumer_config"], conf)
+    mocker.patch.dict(
+        datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"], conf
+    )
     load_config = mocker.patch(
-        "datanommer.commands.fedora_messaging_config.conf.load_config",
+        "datanommer.commands.utils.fedora_messaging_config.conf.load_config",
     )
     datanommer.commands.get_config("some-path")
     load_config.assert_called_with("some-path")
@@ -53,7 +55,7 @@ def test_get_datanommer_sqlalchemy_url_config(mocker):
 def test_create(mocker):
     mock_model_init = mocker.patch("datanommer.commands.m.init")
     mocker.patch.dict(
-        datanommer.commands.fedora_messaging_config.conf["consumer_config"],
+        datanommer.commands.utils.fedora_messaging_config.conf["consumer_config"],
         {
             "datanommer_sqlalchemy_url": "TESTURL",
             "alembic_ini": "/some/where",
diff --git a/datanommer.commands/tests/test_extract_users.py b/datanommer.commands/tests/test_extract_users.py
index a3b4a0482..f5590f57b 100644
--- a/datanommer.commands/tests/test_extract_users.py
+++ b/datanommer.commands/tests/test_extract_users.py
@@ -171,7 +171,8 @@ def test_extract_force_schema(bodhi_message_db, mock_config, mock_init):
 
 
 def test_extract_invalid_message(bodhi_message_db, mock_config, mock_init):
-    bodhi_message_db.msg = "this is invalid"
+    bodhi_message_db.msg_raw = "this is invalid"
+    bodhi_message_db.msg_json = None
     m.session.commit()
 
     runner = CliRunner()
diff --git a/datanommer.commands/tests/test_populate_json.py b/datanommer.commands/tests/test_populate_json.py
new file mode 100644
index 000000000..e0559f2f2
--- /dev/null
+++ b/datanommer.commands/tests/test_populate_json.py
@@ -0,0 +1,52 @@
+from unittest.mock import Mock
+
+import pytest
+from click.testing import CliRunner
+
+import datanommer.models as m
+from datanommer.commands.populate_json import main as populate_json
+
+from .utils import generate_bodhi_update_complete_message
+
+
+@pytest.fixture
+def bodhi_message_db(datanommer_models):
+    msg = generate_bodhi_update_complete_message()
+    m.add(msg)
+    msg_in_db = m.Message.from_msg_id(msg.id)
+    msg_in_db.msg_raw = msg_in_db.msg_json
+    msg_in_db.msg_json = None
+    m.session.commit()
+    m.session.refresh(msg_in_db)
+    assert msg_in_db.msg_json is None
+    return msg_in_db
+
+
+@pytest.fixture(autouse=True)
+def no_expunge(datanommer_models, monkeypatch):
+    monkeypatch.setattr(m.session, "expunge_all", Mock(name="expunge_all"))
+    monkeypatch.setattr(m.session, "expunge", Mock(name="expunge"))
+
+
+def test_populate_json(bodhi_message_db, mock_config, mock_init):
+    runner = CliRunner()
+    result = runner.invoke(populate_json)
+
+    assert result.exit_code == 0, result.output
+
+    m.session.refresh(bodhi_message_db)
+    print(bodhi_message_db.msg_json)
+    assert bodhi_message_db.msg_json is not None
+    assert bodhi_message_db.msg_raw is None
+    total, _pages, _messages = m.Message.grep(jsons=['$.comment.user.name == "dudemcpants"'])
+    assert total == 1
+    assert _messages == [bodhi_message_db]
+
+
+def test_populate_json_no_message(monkeypatch, mock_config, mock_init):
+    monkeypatch.setattr(m.session, "execute", Mock(name="execute"))
+    runner = CliRunner()
+    result = runner.invoke(populate_json)
+    assert result.exit_code == 0, result.output
+    assert result.output == "No message to populate.\n"
+    m.session.execute.assert_not_called()
diff --git a/datanommer.models/datanommer/models/__init__.py b/datanommer.models/datanommer/models/__init__.py
index fcf8309f0..1460f5464 100644
--- a/datanommer.models/datanommer/models/__init__.py
+++ b/datanommer.models/datanommer/models/__init__.py
@@ -25,6 +25,7 @@
 from sqlalchemy import (
     and_,
     between,
+    cast,
     Column,
     create_engine,
     DateTime,
@@ -32,6 +33,7 @@
     event,
     ForeignKey,
     func,
+    Index,
     Integer,
     not_,
     or_,
@@ -156,7 +158,7 @@ def add(message):
         msg_id=message.id,
         topic=message.topic,
         timestamp=sent_at,
-        msg=message.body,
+        msg_json=message.body,
         headers=headers,
         users=usernames,
         packages=packages,
@@ -213,7 +215,15 @@ def coerce_compared_value(self, op, value):
 
 class Message(DeclarativeBase):
     __tablename__ = "messages"
-    __table_args__ = (UniqueConstraint("msg_id", "timestamp"),)
+    __table_args__ = (
+        UniqueConstraint("msg_id", "timestamp"),
+        Index(
+            "json_root",
+            "msg_json",
+            postgresql_using="gin",
+            postgresql_ops={"msg_json": "jsonb_path_ops"},
+        ),
+    )
 
     id = Column(Integer, primary_key=True, autoincrement=True)
     msg_id = Column(Unicode, nullable=True, default=None, index=True)
@@ -227,7 +237,8 @@ class Message(DeclarativeBase):
     crypto = Column(UnicodeText)
     source_name = Column(Unicode, default="datanommer")
     source_version = Column(Unicode, default=lambda context: __version__)
-    msg = Column(_JSONEncodedDict, nullable=False)
+    msg_raw = Column(_JSONEncodedDict, nullable=True)
+    msg_json = Column(postgresql.JSONB(none_as_null=True), nullable=True)
     headers = Column(postgresql.JSONB(none_as_null=True))
     users = relationship(
         "User",
@@ -248,6 +259,10 @@ class Message(DeclarativeBase):
         ),
     )
 
+    @property
+    def msg(self):
+        return self.msg_json if self.msg_json is not None else self.msg_raw
+
     @validates("topic")
     def get_category(self, key, topic):
         """Update the category when the topic is set.
@@ -373,6 +388,8 @@ def make_query(
         topics=None,
         not_topics=None,
         contains=None,
+        jsons=None,
+        jsons_and=None,
     ):
         """Flexible query interface for messages.
 
@@ -400,6 +417,16 @@ def make_query(
 
             (user == 'ralph') AND
             NOT (category == 'bodhi' OR category == 'wiki')
+
+        ----
+
+        The ``jsons`` argument is a list of jsonpath filters, please refer to
+        `PostgreSQL's documentation
+        <https://www.postgresql.org/docs/current/functions-json.html#FUNCTIONS-SQLJSON-PATH>`_
+        on the matter to learn how to build the jsonpath expression.
+
+        The ``jsons_and`` argument is similar to the ``jsons`` argument, but all
+        the values must match for a message to be returned.
         """
 
         users = users or []
@@ -411,6 +438,8 @@ def make_query(
         topics = topics or []
         not_topics = not_topics or []
         contains = contains or []
+        jsons = jsons or []
+        jsons_and = jsons_and or []
 
         Message = cls
         query = select(Message)
@@ -442,7 +471,19 @@ def make_query(
             query = query.where(or_(*(Message.topic == topic for topic in topics)))
 
         if contains:
-            query = query.where(or_(*(Message.msg.like(f"%{contain}%") for contain in contains)))
+            query = query.where(
+                or_(
+                    *(
+                        cast(Message.msg_json, UnicodeText).like(f"%{contain}%")
+                        for contain in contains
+                    )
+                )
+            )
+
+        if jsons:
+            query = query.where(or_(*(Message.msg_json.path_match(j) for j in jsons)))
+        if jsons_and:
+            query = query.where(and_(*(Message.msg_json.path_match(j) for j in jsons_and)))
 
         # And then the four negative filters as necessary
         if not_users:
@@ -507,6 +548,11 @@ def grep(
 
         The ``jsons_and`` argument is similar to the ``jsons`` argument, but all
         the values must match for a message to be returned.
+
+        ----
+
+        If the `defer` argument evaluates to True, the query won't actually
+        be executed, but a SQLAlchemy query object returned instead.
         """
         query = cls.make_query(**kwargs)
         # Finally, tag on our pagination arguments
diff --git a/datanommer.models/datanommer/models/alembic/versions/0ce25211a227_message_msg_raw.py b/datanommer.models/datanommer/models/alembic/versions/0ce25211a227_message_msg_raw.py
new file mode 100644
index 000000000..a8d3d1497
--- /dev/null
+++ b/datanommer.models/datanommer/models/alembic/versions/0ce25211a227_message_msg_raw.py
@@ -0,0 +1,25 @@
+"""Rename Message.msg to Message.msg_raw.
+
+This is backwards-incompatible.
+
+Revision ID: 0ce25211a227
+Revises: f6d590f5c53f
+Create Date: 2024-05-16 14:47:47.180323
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "0ce25211a227"
+down_revision = "f6d590f5c53f"
+
+
+def upgrade():
+    # Rename the msg column to msg_raw
+    op.alter_column("messages", "msg", new_column_name="msg_raw")
+
+
+def downgrade():
+    op.alter_column("messages", "msg_raw", new_column_name="msg")
diff --git a/datanommer.models/datanommer/models/alembic/versions/f6918385051f_messages_headers_index.py b/datanommer.models/datanommer/models/alembic/versions/f6918385051f_messages_headers_index.py
new file mode 100644
index 000000000..2b5972a1c
--- /dev/null
+++ b/datanommer.models/datanommer/models/alembic/versions/f6918385051f_messages_headers_index.py
@@ -0,0 +1,29 @@
+"""Messages.headers index
+
+Revision ID: f6918385051f
+Revises: 951c40020acc
+Create Date: 2024-05-07 16:05:05.344863
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "f6918385051f"
+down_revision = "951c40020acc"
+
+
+def upgrade():
+    op.create_index(
+        "ix_messages_headers",
+        "messages",
+        ["headers"],
+        unique=False,
+        postgresql_using="gin",
+        postgresql_ops={"msg_json": "jsonb_path_ops"},
+    )
+
+
+def downgrade():
+    op.drop_index("ix_messages_headers", table_name="messages", postgresql_using="gin")
diff --git a/datanommer.models/datanommer/models/alembic/versions/f6d590f5c53f_message_msg_json.py b/datanommer.models/datanommer/models/alembic/versions/f6d590f5c53f_message_msg_json.py
new file mode 100644
index 000000000..40d3d53d0
--- /dev/null
+++ b/datanommer.models/datanommer/models/alembic/versions/f6d590f5c53f_message_msg_json.py
@@ -0,0 +1,41 @@
+"""Create the Message.msg_json column and index it.
+
+Revision ID: f6d590f5c53f
+Revises: f6918385051f
+Create Date: 2024-05-02 14:38:38.399397
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+
+# revision identifiers, used by Alembic.
+revision = "f6d590f5c53f"
+down_revision = "f6918385051f"
+
+
+def upgrade():
+    op.add_column(
+        "messages",
+        sa.Column(
+            "msg_json", postgresql.JSONB(none_as_null=True, astext_type=sa.Text()), nullable=True
+        ),
+    )
+    op.create_index(
+        "ix_messages_msg_json_root",
+        "messages",
+        ["msg_json"],
+        unique=False,
+        postgresql_using="gin",
+        postgresql_ops={"msg_json": "jsonb_path_ops"},
+    )
+    # Make the msg column nullable so that the populate-json script can run
+    op.alter_column("messages", "msg", nullable=True)
+
+
+def downgrade():
+    op.alter_column("messages", "msg", nullable=False)
+    op.drop_index("ix_messages_msg_json_root", table_name="messages", postgresql_using="gin")
+    op.drop_column("messages", "msg_json")
diff --git a/datanommer.models/news/PR1312.feature b/datanommer.models/news/PR1312.feature
new file mode 100644
index 000000000..d7a4fc74f
--- /dev/null
+++ b/datanommer.models/news/PR1312.feature
@@ -0,0 +1 @@
+Add a JSONB column to the database to make jsonpath queries
\ No newline at end of file
diff --git a/datanommer.models/tests/test_model.py b/datanommer.models/tests/test_model.py
index 786a588e8..f93fac797 100644
--- a/datanommer.models/tests/test_model.py
+++ b/datanommer.models/tests/test_model.py
@@ -428,6 +428,44 @@ def test_grep_contains(datanommer_models):
     assert r[0].msg == example_message.body
 
 
+def test_grep_jsons(datanommer_models):
+    example_message = generate_bodhi_update_complete_message()
+    dm.add(example_message)
+    dm.session.flush()
+    t, p, r = dm.Message.grep(jsons=['$.comment.update.status == "pending"'])
+    assert t == 1
+    assert p == 1
+    assert len(r) == 1
+    assert r[0].msg == example_message.body
+
+
+def test_grep_jsons_and_all_match(datanommer_models):
+    example_message = generate_bodhi_update_complete_message()
+    dm.add(example_message)
+    dm.session.flush()
+    t, p, r = dm.Message.grep(
+        jsons_and=['$.comment.update.status == "pending"', '$.comment.user.name == "dudemcpants"']
+    )
+    assert t == 1
+    assert p == 1
+    assert len(r) == 1
+    assert r[0].msg == example_message.body
+
+
+def test_grep_jsons_and_one_match(datanommer_models):
+    example_message = generate_bodhi_update_complete_message()
+    dm.add(example_message)
+    dm.session.flush()
+    t, p, r = dm.Message.grep(
+        jsons_and=[
+            '$.comment.update.status == "pending"',
+            '$.comment.user.name == "does-not-match"',
+        ]
+    )
+    assert t == 0
+    assert len(r) == 0
+
+
 def test_grep_rows_per_page(datanommer_models, add_200_messages):
     total, pages, messages = dm.Message.grep()
     assert total == 200