diff --git a/README.md b/README.md index c47bca2e8..62af25e83 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ This method uses Docker to run the complete application stack. > **Note** > When running locally, you may need to update one of the ports in the `.env` file if it conflicts with another application on your machine. -3. Build and run the project with `docker-compose build && docker-compose up -d && docker-compose logs -f` +3. Build and run the project with `docker compose build && docker compose up -d && docker compose logs -f` ## Installation (Frontend Only) @@ -57,7 +57,7 @@ You'll need to replace `police-data-trust-api-1` with the name of the container docker container ls CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES c0cf******** police-data-trust-api "/bin/sh -c '/wait &…" About a minute ago Up About a minute 0.0.0.0:5001->5001/tcp police-data-trust-api-1 -5e6f******** postgres:16.1 "docker-entrypoint.s…" 3 days ago Up About a minute 0.0.0.0:5432->5432/tcp police-data-trust-db-1 +5e6f******** postgres:16 "docker-entrypoint.s…" 3 days ago Up About a minute 0.0.0.0:5432->5432/tcp police-data-trust-db-1 dacd******** police-data-trust-web "docker-entrypoint.s…" 3 days ago Up About a minute 0.0.0.0:3000->3000/tcp police-data-trust-web-1 ``` @@ -65,7 +65,7 @@ dacd******** police-data-trust-web "docker-entrypoint.s…" 3 days ago The current backend tests can be found in the GitHub Actions workflow file [python-tests.yml](https://github.com/codeforboston/police-data-trust/blob/0488d03c2ecc01ba774cf512b1ed2f476441948b/.github/workflows/python-tests.yml) -To run the tests locally, first start the application with docker-compose. Then open up a command line interface to the running container: +To run the tests locally, first start the application with docker compose. Then open up a command line interface to the running container: ``` docker exec -it "police-data-trust-api-1" /bin/bash @@ -82,7 +82,7 @@ python -m pytest The current frontend tests can be found in the GitHub Actions workflow file [frontend-checks.yml](https://github.com/codeforboston/police-data-trust/blob/0488d03c2ecc01ba774cf512b1ed2f476441948b/.github/workflows/frontend-checks.yml) -To run the tests locally, first start the application with docker-compose. Then open up a command line interface to the running container: +To run the tests locally, first start the application with dockerccompose. Then open up a command line interface to the running container: ``` docker exec -it "police-data-trust-web-1" /bin/bash diff --git a/backend/Dockerfile.cloud b/backend/Dockerfile.cloud index a64b8dc0d..649dacced 100644 --- a/backend/Dockerfile.cloud +++ b/backend/Dockerfile.cloud @@ -15,7 +15,7 @@ RUN arch=$(arch) && \ file=pandas-2.2.2-cp312-cp312-manylinux_2_17_${arch}.manylinux2014_${arch}.whl && \ url="https://pypi.debian.net/pandas/${file}" && \ wget ${url} && \ - sed -i "s/pandas==1.5.3/${file}/" prod.txt + sed -i "s/pandas==2.2.2/${file}/" prod.txt RUN pip install --no-cache-dir -r prod.txt COPY . . diff --git a/backend/api.py b/backend/api.py index 46756300a..fa0ef9714 100644 --- a/backend/api.py +++ b/backend/api.py @@ -33,6 +33,9 @@ def create_app(config: Optional[str] = None): # def _(): # db.create_all() + # start background processor for SQS imports + + return app diff --git a/backend/import/__init__.py b/backend/import/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/import/loaders/__init__.py b/backend/import/loaders/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/import/loop.py b/backend/import/loop.py new file mode 100644 index 000000000..68b73cd82 --- /dev/null +++ b/backend/import/loop.py @@ -0,0 +1,50 @@ +from io import BytesIO +from logging import getLogger +from time import sleep + +import boto3 +import ujson + +class Importer: + def __init__(self, queue_name: str, region: str = "us-east-1"): + self.queue_name = queue_name + self.session = boto3.Session(region_name=region) + self.sqs_client = self.session.client("sqs") + self.s3_client = self.session.client("s3") + self.sqs_queue_url = self.sqs_client.get_queue_url(QueueName=self.queue_name) + self.logger = getLogger(self.__class__.__name__) + + def run(self): + while True: + resp = self.sqs_client.receive_message( + QueueUrl=self.sqs_queue_url, + MaxNumberOfMessages=1, # retrieve one message at a time - we could up this and parallelize but no point until way more files. + VisibilityTimeout=600, # 10 minutes to process message before it becomes visible for another consumer. + ) + # if no messages found, wait 5m for next poll + if len(resp["Messages"]) == 0: + sleep(600) + continue + + for message in resp["Messages"]: + sqs_body = ujson.loads(message["Body"]) + for record in sqs_body["Records"]: # this comes through as a list, but we expect one object + bucket_name = record["s3"]["bucket"]["name"] + key = record["s3"]["object"]["key"] + with BytesIO() as fileobj: + self.s3_client.download_fileobj(bucket_name, key, fileobj) + fileobj.seek(0) + content = fileobj.read() + + # TODO: we now have an in-memory copy of the s3 file content. This is where we would run the import. + # we want a standardized importer class; we would call something like below: + # loader = Loader(content).load() + + self.logger.info(f"Imported s3://{bucket_name}/{key}") + +class Loader: + def __init__(self, content: bytes): + self.content = content + + def load(self): + raise Exception("unimplemented; extend this class to write a load migration.") diff --git a/backend/scraper/data_scrapers/README.md b/backend/scraper/data_scrapers/README.md index 7522a122c..17ef71078 100644 --- a/backend/scraper/data_scrapers/README.md +++ b/backend/scraper/data_scrapers/README.md @@ -14,10 +14,10 @@ You can also run the scraper in Docker: ```bash # From the base of the repository -docker-compose build api -docker-compose run -u $(id -u) api flask scrape +docker compose build api +docker compose run -u $(id -u) api flask scrape # Stop the database service -docker-compose down +docker compose down ``` You may see several warnings about mixed types. The script could also take several minutes. diff --git a/backend/scraper/notebooks/cpdp.ipynb b/backend/scraper/notebooks/cpdp.ipynb index a4627d6a8..ab889c14b 100644 --- a/backend/scraper/notebooks/cpdp.ipynb +++ b/backend/scraper/notebooks/cpdp.ipynb @@ -29,7 +29,7 @@ "\n", "```bash\n", "# Stop services and remove volumes, rebuild images, start the database, create tables, run seeds, and follow logs\n", - "docker-compose down -v && docker-compose up --build -d db api && docker-compose logs -f\n", + "docker compose down -v && docker compose up --build -d db api && docker compose logs -f\n", "```\n", "\n", "Then open the notebook with either [VSCode](https://code.visualstudio.com/) or `jupyter notebook`.\n", diff --git a/backend/scraper/notebooks/mpv.ipynb b/backend/scraper/notebooks/mpv.ipynb index 4d4cb95e1..793121844 100644 --- a/backend/scraper/notebooks/mpv.ipynb +++ b/backend/scraper/notebooks/mpv.ipynb @@ -23,7 +23,7 @@ "\n", "```bash\n", "# Stop services and remove volumes, rebuild images, start the database, create tables, run seeds, and follow logs\n", - "docker-compose down -v && docker-compose up --build -d db api && docker-compose logs -f\n", + "docker compose down -v && docker compose up --build -d db api && docker compose logs -f\n", "```\n", "\n", "Then open the notebook with either [VSCode](https://code.visualstudio.com/) or `jupyter notebook`.\n", diff --git a/docker-compose.notebook.yml b/docker-compose.notebook.yml index 19c19b3d8..080e5ce25 100644 --- a/docker-compose.notebook.yml +++ b/docker-compose.notebook.yml @@ -1,4 +1,3 @@ -version: "3" services: api: command: bash -c '/wait && flask psql create && flask psql init && jupyter notebook --allow-root --ip=0.0.0.0 --port=8889' diff --git a/docker-compose.yml b/docker-compose.yml index 164e850a9..ee99ec841 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,6 @@ -version: "3" services: db: - image: postgres:16.2 #AWS RDS latest version + image: postgres:16 #AWS RDS latest version env_file: - ".env" volumes: diff --git a/requirements/Dockerfile b/requirements/Dockerfile index 0c2d03836..cdf6a4cae 100644 --- a/requirements/Dockerfile +++ b/requirements/Dockerfile @@ -2,7 +2,7 @@ # requirements, so this image starts with the same image as the database # containers and installs the same version of python as the api containers -FROM postgres:16.2 as base +FROM postgres:16 as base RUN apt-get update && apt-get install -y \ make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev \ @@ -15,9 +15,9 @@ SHELL ["bash", "-lc"] RUN curl https://pyenv.run | bash && \ echo 'export PATH="$HOME/.pyenv/shims:$HOME/.pyenv/bin:$PATH"' >> ~/.bashrc -ENV PYTHON_VERSION=3.12.3 +ENV PYTHON_VERSION=3.12.4 RUN pyenv install ${PYTHON_VERSION} && pyenv global ${PYTHON_VERSION} -RUN pip install pip-tools +RUN pip install -U pip-tools COPY . requirements/ diff --git a/requirements/README.md b/requirements/README.md index 35a553c92..515161fe0 100644 --- a/requirements/README.md +++ b/requirements/README.md @@ -20,7 +20,7 @@ python -m pip install -r requirements/dev_unix.txt ```bash cd requirements -docker-compose up --build --force-recreate +docker compose up --build --force-recreate ``` If you run the application natively, first install the pip-compile tool: diff --git a/requirements/_core.in b/requirements/_core.in index 3a078d029..cb7238b96 100644 --- a/requirements/_core.in +++ b/requirements/_core.in @@ -1,5 +1,6 @@ bcrypt==3.2.2 black +boto3 celery flake8 flask @@ -35,4 +36,5 @@ numpy spectree jupyter mixpanel -ua-parser \ No newline at end of file +ua-parser +ujson diff --git a/requirements/dev_unix.txt b/requirements/dev_unix.txt index df3a19237..5f2c4ec7b 100644 --- a/requirements/dev_unix.txt +++ b/requirements/dev_unix.txt @@ -44,6 +44,12 @@ bleach==6.1.0 # via nbconvert blinker==1.7.0 # via flask-mail +boto3==1.34.133 + # via -r requirements/_core.in +botocore==1.34.133 + # via + # boto3 + # s3transfer build==1.2.1 # via pip-tools celery==5.3.6 @@ -186,6 +192,10 @@ jinja2==3.1.3 # jupyterlab # jupyterlab-server # nbconvert +jmespath==1.0.1 + # via + # boto3 + # botocore json5==0.9.25 # via jupyterlab-server jsonpointer==2.4 @@ -405,6 +415,7 @@ pytest-postgresql==5.1.0 python-dateutil==2.9.0 # via # arrow + # botocore # celery # jupyter-client # pandas @@ -451,6 +462,8 @@ rpds-py==0.18.0 # via # jsonschema # referencing +s3transfer==0.10.2 + # via boto3 send2trash==1.8.2 # via jupyter-server six==1.16.0 @@ -528,10 +541,13 @@ tzdata==2024.1 # pandas ua-parser==0.18.0 # via -r requirements/_core.in +ujson==5.10.0 + # via -r requirements/_core.in uri-template==1.3.0 # via jsonschema urllib3==1.26.18 # via + # botocore # mixpanel # requests vine==5.1.0 diff --git a/requirements/dev_windows.txt b/requirements/dev_windows.txt index d24228c71..d394d5ee0 100644 --- a/requirements/dev_windows.txt +++ b/requirements/dev_windows.txt @@ -44,6 +44,12 @@ bleach==6.1.0 # via nbconvert blinker==1.7.0 # via flask-mail +boto3==1.34.133 + # via -r requirements/_core.in +botocore==1.34.133 + # via + # boto3 + # s3transfer build==1.2.1 # via pip-tools celery==5.3.6 @@ -186,6 +192,10 @@ jinja2==3.1.3 # jupyterlab # jupyterlab-server # nbconvert +jmespath==1.0.1 + # via + # boto3 + # botocore json5==0.9.25 # via jupyterlab-server jsonpointer==2.4 @@ -405,6 +415,7 @@ pytest-postgresql==5.1.0 python-dateutil==2.9.0 # via # arrow + # botocore # celery # jupyter-client # pandas @@ -451,6 +462,8 @@ rpds-py==0.18.0 # via # jsonschema # referencing +s3transfer==0.10.2 + # via boto3 send2trash==1.8.2 # via jupyter-server six==1.16.0 @@ -528,10 +541,13 @@ tzdata==2024.1 # pandas ua-parser==0.18.0 # via -r requirements/_core.in +ujson==5.10.0 + # via -r requirements/_core.in uri-template==1.3.0 # via jsonschema urllib3==1.26.18 # via + # botocore # mixpanel # requests vine==5.1.0 diff --git a/requirements/docker-compose.yml b/requirements/docker-compose.yml index 1771b1484..6010fad46 100644 --- a/requirements/docker-compose.yml +++ b/requirements/docker-compose.yml @@ -1,4 +1,3 @@ -version: "3" services: pip-compile: build: diff --git a/requirements/docs.txt b/requirements/docs.txt index 34bd163af..cdaf7f87e 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -4,65 +4,89 @@ # # pip-compile requirements/docs.in # +babel==2.15.0 + # via mkdocs-material +certifi==2024.6.2 + # via requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via mkdocs -ghp-import==2.0.1 - # via mkdocs -importlib-metadata==4.5.0 +colorama==0.4.6 + # via mkdocs-material +ghp-import==2.1.0 # via mkdocs -jinja2==3.1.3 +idna==3.7 + # via requests +jinja2==3.1.4 # via # mkdocs # mkdocs-macros-plugin -markdown==3.3.4 + # mkdocs-material +markdown==3.6 # via # mkdocs # mkdocs-material # pymdown-extensions markupsafe==2.1.5 - # via jinja2 + # via + # jinja2 + # mkdocs mergedeep==1.3.4 - # via mkdocs -mkdocs==1.2.1 + # via + # mkdocs + # mkdocs-get-deps +mkdocs==1.6.0 # via # -r requirements/docs.in # mkdocs-macros-plugin # mkdocs-material -mkdocs-macros-plugin==0.5.5 +mkdocs-get-deps==0.2.0 + # via mkdocs +mkdocs-macros-plugin==1.0.5 # via -r requirements/docs.in -mkdocs-material==7.1.8 - # via - # -r requirements/docs.in - # mkdocs-macros-plugin - # mkdocs-material-extensions -mkdocs-material-extensions==1.0.1 +mkdocs-material==9.5.27 + # via -r requirements/docs.in +mkdocs-material-extensions==1.3.1 # via mkdocs-material -packaging==24.0 +packaging==24.1 # via mkdocs -pygments==2.17.2 +paginate==0.5.6 + # via mkdocs-material +pathspec==0.12.1 + # via mkdocs +platformdirs==4.2.2 + # via mkdocs-get-deps +pygments==2.18.0 # via # -r requirements/docs.in # mkdocs-material -pymdown-extensions==8.2 +pymdown-extensions==10.8.1 # via # -r requirements/docs.in # mkdocs-material -python-dateutil==2.9.0 +python-dateutil==2.9.0.post0 # via # ghp-import # mkdocs-macros-plugin pyyaml==6.0.1 # via # mkdocs + # mkdocs-get-deps # mkdocs-macros-plugin + # pymdown-extensions # pyyaml-env-tag pyyaml-env-tag==0.1 # via mkdocs +regex==2024.5.15 + # via mkdocs-material +requests==2.32.3 + # via mkdocs-material six==1.16.0 # via python-dateutil -termcolor==1.1.0 +termcolor==2.4.0 # via mkdocs-macros-plugin -watchdog==2.1.2 +urllib3==2.2.2 + # via requests +watchdog==4.0.1 # via mkdocs -zipp==3.17.0 - # via importlib-metadata diff --git a/requirements/prod.txt b/requirements/prod.txt index 15ecca6fc..2bb9f6c50 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -44,6 +44,12 @@ bleach==6.1.0 # via nbconvert blinker==1.7.0 # via flask-mail +boto3==1.34.133 + # via -r requirements/_core.in +botocore==1.34.133 + # via + # boto3 + # s3transfer build==1.2.1 # via pip-tools celery==5.3.6 @@ -186,6 +192,10 @@ jinja2==3.1.3 # jupyterlab # jupyterlab-server # nbconvert +jmespath==1.0.1 + # via + # boto3 + # botocore json5==0.9.25 # via jupyterlab-server jsonpointer==2.4 @@ -405,6 +415,7 @@ pytest-postgresql==5.1.0 python-dateutil==2.9.0 # via # arrow + # botocore # celery # jupyter-client # pandas @@ -451,6 +462,8 @@ rpds-py==0.18.0 # via # jsonschema # referencing +s3transfer==0.10.2 + # via boto3 send2trash==1.8.2 # via jupyter-server six==1.16.0 @@ -528,10 +541,13 @@ tzdata==2024.1 # pandas ua-parser==0.18.0 # via -r requirements/_core.in +ujson==5.10.0 + # via -r requirements/_core.in uri-template==1.3.0 # via jsonschema urllib3==1.26.18 # via + # botocore # mixpanel # requests vine==5.1.0 diff --git a/run_unix.sh b/run_unix.sh index e679cd685..bbe23905a 100755 --- a/run_unix.sh +++ b/run_unix.sh @@ -1,5 +1,5 @@ #!/bin/bash -# TODO: Postgres db is not set up in this script. It is set up in the init script for docker-compose. +# TODO: Postgres db is not set up in this script. It is set up in the init script for docker compose. function get_python_exec() { # Get a Python executable that can run this app. diff --git a/runtime.txt b/runtime.txt index 4ddc7cd66..74d315a82 100644 --- a/runtime.txt +++ b/runtime.txt @@ -1 +1 @@ -python-3.12.3 +python-3.12.4