Skip to content

Commit

Permalink
Fix tests, refactor updates (more parallelism too). See CHANGELOG.md
Browse files Browse the repository at this point in the history
  • Loading branch information
bjonnh committed Dec 17, 2023
1 parent dd15c5d commit d0a4ec4
Show file tree
Hide file tree
Showing 27 changed files with 537 additions and 569 deletions.
29 changes: 28 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,35 @@ jobs:
poetry install
chmod +x ./update.py
poetry run ./update.py
- name: Run tests
run: |
poetry run pytest
- name: Clean up unnecessary files
if: always()
run: |
rm -rf data/*.csv data/*.db
tests:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Load cached Poetry installation
id: cached-poetry
uses: actions/cache@v3
with:
path: ~/.local
key: poetry-0
- name: Install Poetry
if: steps.cached-poetry.outputs.cache-hit != 'true'
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
- name: Run tests
run: |
poetry install
poetry run pytest
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
2023-12-16
- Fix tests
- Refactor the download of CSVs
- Parallelize the download of CSVs
- Add parameters to the update script
you can run with `--only xxx`, `--stop xxx` or `--skip xxx` to:
- only execute xxx
- stop before xxx
- skip xxx
Where xxx can be a task or a group name
Task/Groups descriptions can be listed with `--list`
- Fix a potential abuse if taxon contains a comma in `generate_database_taxo.py`
File renamed without changes.
447 changes: 219 additions & 228 deletions poetry.lock

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "lotus_search"
version = "0.1.2"
description = ""
authors = ["Jonathan Bisson <[email protected]>"]
authors = ["Jonathan Bisson <[email protected]>", "Adriano Rutz <[email protected]>"]
packages = [{include = "*.py", from = ""}]
readme = "README.md"

Expand All @@ -17,14 +17,16 @@ gunicorn = "^21.2.0"
orjson = "^3.9.10"
pandas = "^2.1.3"
pydantic = "^2.5.1"
pytest = "^6.0.0"
pytest-mock = "*"
requests = "^2.31.0"
requests_mock = "*"
rdkit = "^2023.9.2"
sqlalchemy = "^2.0.23"
uvicorn = "^0.23.2"

[tool.poetry.dev-dependencies]
pytest = "^6.0.0"
pytest-mock = "*"
requests_mock = "*"

[tool.poetry.scripts]
start = "app:main"

Expand Down
39 changes: 19 additions & 20 deletions tests/test_common.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
import pytest
import requests_mock

from update.common import remove_wd_entity_prefix, wd_sparql_to_csv
from update.common import remove_wd_entity_prefix, sparql_to_csv


def test_wd_sparql_to_csv_returns_expected_csv():
with requests_mock.Mocker() as m:
m.get("https://query.wikidata.org/sparql", text="expected_csv")
result = wd_sparql_to_csv("query")
assert result == "expected_csv"
class TestWdSparqlToCsv:
def test_returns_expected_csv(self):
with requests_mock.Mocker() as m:
m.get("https://query.wikidata.org/sparql", text="expected_csv")
result = sparql_to_csv("query")
assert result == "expected_csv"

def test_uses_provided_url(self):
with requests_mock.Mocker() as m:
m.get("https://other.url/sparql", text="expected_csv")
result = sparql_to_csv("query", "https://other.url/sparql")
assert result == "expected_csv"

def test_wd_sparql_to_csv_uses_provided_url():
with requests_mock.Mocker() as m:
m.get("https://other.url/sparql", text="expected_csv")
result = wd_sparql_to_csv("query", "https://other.url/sparql")
assert result == "expected_csv"

class TestRemoveWdEntityPrefix:
def test_removes_prefix(self):
result = remove_wd_entity_prefix("http://www.wikidata.org/entity/Q123")
assert result == "123"

def test_remove_wd_entity_prefix_removes_prefix():
result = remove_wd_entity_prefix("http://www.wikidata.org/entity/Q123")
assert result == "123"


def test_remove_wd_entity_prefix_does_not_remove_other_text():
result = remove_wd_entity_prefix("http://www.wikidata.org/entity/Q123/other")
assert result == "123/other"
def test_does_not_remove_other_text(self):
result = remove_wd_entity_prefix("http://www.wikidata.org/entity/Q123/other")
assert result == "123/other"
32 changes: 0 additions & 32 deletions tests/test_download_couples_referenced.py

This file was deleted.

32 changes: 0 additions & 32 deletions tests/test_download_doi.py

This file was deleted.

28 changes: 28 additions & 0 deletions tests/test_download_query_as_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from unittest.mock import patch

import pytest

from update.download_query_as_csv import run


class TestRunQueryToCSV:
@pytest.fixture(autouse=True)
def setup(self, tmp_path):
self.query_file = tmp_path / "query.sparql"
self.query_file.write_text("SELECT ?item WHERE {?item wdt:P31 wd:Q5.} LIMIT 1")
self.output_file = tmp_path / "output.csv"

def test_retries_on_timeout(self):
with patch('update.download_query_as_csv.sparql_to_csv') as mock_sparql_to_csv:
mock_sparql_to_csv.side_effect = ['java.util.concurrent.TimeoutException', 'valid result']
run(self.query_file, self.output_file)
assert mock_sparql_to_csv.call_count == 2
assert self.output_file.read_text() == 'valid result'

def test_writes_expected_result(self):
with patch('update.download_query_as_csv.sparql_to_csv') as mock_sparql_to_csv, \
patch('update.download_query_as_csv.remove_wd_entity_prefix') as mock_remove_wd_entity_prefix:
mock_sparql_to_csv.return_value = 'valid result'
mock_remove_wd_entity_prefix.return_value = 'expected result'
run(self.query_file, self.output_file)
assert self.output_file.read_text() == 'expected result'
32 changes: 0 additions & 32 deletions tests/test_download_smiles.py

This file was deleted.

32 changes: 0 additions & 32 deletions tests/test_download_taxonomy_parenting.py

This file was deleted.

6 changes: 3 additions & 3 deletions tests/test_generate_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@

import pytest

from update.generate_database import run
from update import generate_database


@patch("update.generate_database.pickle.dump")
@patch("update.generate_database.pickle.load")
@patch("update.generate_database.open", new_callable=mock_open)
def test_run_loads_and_dumps_database(mock_open, mock_pickle_load, mock_pickle_dump):
mock_pickle_load.return_value = {"key": "value"}
run(Path("."))
generate_database.run()
assert mock_pickle_load.call_count == 2
assert mock_pickle_dump.call_count == 1

Expand All @@ -26,7 +26,7 @@ def test_run_updates_database_with_loaded_data(
{"chemo_key": "chemo_value"},
{"taxo_key": "taxo_value"},
]
run(Path("."))
generate_database.run()
mock_pickle_dump.assert_called_once_with(
{"chemo_key": "chemo_value", "taxo_key": "taxo_value"},
mock_open.return_value.__enter__.return_value,
Expand Down
10 changes: 6 additions & 4 deletions tests/test_generate_database_chemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

from update.generate_database_chemo import process_smiles, run
from update import generate_database_chemo


@patch("update.generate_database_chemo.ProcessPoolExecutor")
Expand All @@ -21,6 +21,7 @@ def test_run_generates_database(
(
0,
"smiles",
"smol",
"smiles_clean",
"sim_fp",
"sub_fp",
Expand All @@ -30,7 +31,7 @@ def test_run_generates_database(
)
]
)
run(Path("."))
generate_database_chemo.run()
assert mock_pickle_dump.call_count == 1


Expand All @@ -54,10 +55,11 @@ def test_process_smiles_returns_expected_result_on_success(
mock_fingerprint.return_value = "sim_fp"
mock_pattern_fp.return_value = "sub_fp"
mock_mol.return_value.ToBinary.return_value = "mol_h"
result = process_smiles((0, "smiles"))
result = generate_database_chemo.process_smiles((0, "smiles"))
assert result == (
0,
"smiles",
"smol",
"smiles_clean",
"sim_fp",
"sub_fp",
Expand All @@ -68,5 +70,5 @@ def test_process_smiles_returns_expected_result_on_success(


def test_process_smiles_returns_none_on_failure():
result = process_smiles((0, "invalid_smiles"))
result = generate_database_chemo.process_smiles((0, "invalid_smiles"))
assert result is None
Loading

0 comments on commit d0a4ec4

Please sign in to comment.