Skip to content

Commit

Permalink
Upgrade from Python 3.6 to Python 3.12
Browse files Browse the repository at this point in the history
Use the https://github.com/ArchiveTeam/ludios_wpull fork which is more
recently updated and supports Python 3.12.
  • Loading branch information
chosak committed Jul 5, 2024
1 parent 3b88880 commit c962020
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 22 deletions.
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ and export results as CSV or JSON reports.
Create a Python virtual environment and install required packages:

```
python3.6 -m venv venv
python3.12 -m venv venv
source venv/bin/activate
pip install -r requirements/base.txt
```
Expand Down Expand Up @@ -139,7 +139,7 @@ yarn build
Create a Python virtual environment and install required packages:

```
python3.6 -m venv venv
python3.12 -m venv venv
source venv/bin/activate
pip install -r requirements/base.txt
```
Expand Down Expand Up @@ -269,9 +269,8 @@ fab configure -H crawler

The `configure` command:

- Installs Node, Yarn, and Git
- Installs a modern version of SQLite
- Installs Python 3
- Installs Node and Git
- Installs Python 3.12

### Deploying the application

Expand Down
6 changes: 2 additions & 4 deletions crawler/wpull_plugin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
import logging
import re
from urllib import parse
Expand Down Expand Up @@ -39,9 +38,8 @@
def patch_wpull_connection():
"""Use wait_timeout instead of close_timeout for readline."""

@asyncio.coroutine
def readline(self):
data = yield from self.run_network_operation(
async def readline(self):
data = await self.run_network_operation(
self.reader.readline(), wait_timeout=self._timeout, name="Readline"
)
return data
Expand Down
10 changes: 5 additions & 5 deletions fabfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@

NODE_VERSION = "20"

SQLITE_VERSION = "3390200"
SQLITE_BASENAME = f"sqlite-autoconf-{SQLITE_VERSION}"
SQLITE_INSTALL_ROOT = f"{DEPLOY_ROOT}/{SQLITE_BASENAME}"

SOURCE_PARENT = f"{DEPLOY_ROOT}/cfpb"
SOURCE_REPO = "https://github.com/cfpb/website-indexer.git"
SOURCE_DIRNAME = "website-indexer"
Expand Down Expand Up @@ -62,11 +58,15 @@ def configure(conn):
# Install git to be able to clone source code repository.
conn.sudo("yum install -y git")

# Install Python 3.12.
conn.sudo("yum install -y python3.12")

# Set up deploy root and grant permissions to deploy user.
conn.sudo(f"mkdir -p {DEPLOY_ROOT}")
conn.sudo(f"chown -R {conn.user}:{conn.user} {DEPLOY_ROOT}")



@task
def deploy(conn):
print("Cloning and configuring application source code")
Expand All @@ -85,7 +85,7 @@ def deploy(conn):
with conn.cd(SOURCE_ROOT):
conn.sudo("corepack enable")
conn.run("yarn && yarn build")
conn.run("python3 -m venv venv")
conn.run(f"python3.12 -m venv venv")

with conn.prefix("source venv/bin/activate"):
conn.run("pip install -r requirements/base.txt")
Expand Down
11 changes: 3 additions & 8 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,8 @@ django-filter==21.1
django-modelcluster==5.3
djangorestframework==3.13.1
djangorestframework-csv==2.1.1
lxml==4.9.1
whitenoise==5.3.0
wpull==2.0.1

# wpull doesn't set upper bounds for some of its requirements,
# so we need to specify these manually:
# See https://github.com/ArchiveTeam/wpull/blob/v2.0.1/requirements.txt
html5lib==0.9999999
sqlalchemy==1.0.12
tornado==4.5.3
# Ensure libxml2 is loaded dynamically; see
# https://html5-parser.readthedocs.io/en/latest/#unix
wpull@https://github.com/ArchiveTeam/ludios_wpull/archive/refs/tags/5.0.3.tar.gz --no-binary=lxml

0 comments on commit c962020

Please sign in to comment.