Skip to content

Commit

Permalink
Upgrade from Python 3.6 to Python 3.12
Browse files Browse the repository at this point in the history
Use the https://github.com/ArchiveTeam/ludios_wpull fork which is more
recently updated and supports Python 3.12.
  • Loading branch information
chosak committed Jul 5, 2024
1 parent 3b88880 commit 4aeac08
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 21 deletions.
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ and export results as CSV or JSON reports.
Create a Python virtual environment and install required packages:

```
python3.6 -m venv venv
python3.12 -m venv venv
source venv/bin/activate
pip install -r requirements/base.txt
```
Expand Down Expand Up @@ -139,7 +139,7 @@ yarn build
Create a Python virtual environment and install required packages:

```
python3.6 -m venv venv
python3.12 -m venv venv
source venv/bin/activate
pip install -r requirements/base.txt
```
Expand Down Expand Up @@ -269,9 +269,8 @@ fab configure -H crawler

The `configure` command:

- Installs Node, Yarn, and Git
- Installs a modern version of SQLite
- Installs Python 3
- Installs Node and Git
- Installs Python 3.12

### Deploying the application

Expand Down
6 changes: 2 additions & 4 deletions crawler/wpull_plugin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
import logging
import re
from urllib import parse
Expand Down Expand Up @@ -39,9 +38,8 @@
def patch_wpull_connection():
"""Use wait_timeout instead of close_timeout for readline."""

@asyncio.coroutine
def readline(self):
data = yield from self.run_network_operation(
async def readline(self):
data = await self.run_network_operation(
self.reader.readline(), wait_timeout=self._timeout, name="Readline"
)
return data
Expand Down
32 changes: 28 additions & 4 deletions fabfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

NODE_VERSION = "20"

SQLITE_VERSION = "3390200"
SQLITE_BASENAME = f"sqlite-autoconf-{SQLITE_VERSION}"
SQLITE_INSTALL_ROOT = f"{DEPLOY_ROOT}/{SQLITE_BASENAME}"
PYTHON_VERSION = "3.12.4"
PYTHON_BASENAME = f"Python-{PYTHON_VERSION}"
PYTHON_INSTALL_ROOT = f"{DEPLOY_ROOT}/{PYTHON_BASENAME}"

SOURCE_PARENT = f"{DEPLOY_ROOT}/cfpb"
SOURCE_REPO = "https://github.com/cfpb/website-indexer.git"
Expand Down Expand Up @@ -66,6 +66,30 @@ def configure(conn):
conn.sudo(f"mkdir -p {DEPLOY_ROOT}")
conn.sudo(f"chown -R {conn.user}:{conn.user} {DEPLOY_ROOT}")

# Build Python 3.12 to a local directory.
# This doesn't update /usr/bin/python (used by sudo) or /usr/local/bin/python.
conn.sudo("yum install -y openssl-devel bzip2-devel libffi-devel")

with conn.cd(DEPLOY_ROOT):
conn.run(
f"curl -O https://www.python.org/ftp/python/{PYTHON_VERSION}/{PYTHON_BASENAME}.tgz"
)
conn.run(f"tar xzf {PYTHON_BASENAME}.tgz")
conn.run(f"rm {PYTHON_BASENAME}.tgz")

with conn.cd(PYTHON_INSTALL_ROOT):
conn.run("LD_RUN_PATH=/usr/local/lib ./configure --enable-optimizations")

# https://github.com/pyinvoke/invoke/issues/459
conn.sudo(
f"bash -c "
f'"cd {PYTHON_INSTALL_ROOT} && LD_RUN_PATH=/usr/local/lib make"'
)

conn.sudo(
f"chmod a+x {PYTHON_INSTALL_ROOT}/python"
)


@task
def deploy(conn):
Expand All @@ -85,7 +109,7 @@ def deploy(conn):
with conn.cd(SOURCE_ROOT):
conn.sudo("corepack enable")
conn.run("yarn && yarn build")
conn.run("python3 -m venv venv")
conn.run(f"{PYTHON_INSTALL_ROOT}/python -m venv venv")

with conn.prefix("source venv/bin/activate"):
conn.run("pip install -r requirements/base.txt")
Expand Down
11 changes: 3 additions & 8 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,8 @@ django-filter==21.1
django-modelcluster==5.3
djangorestframework==3.13.1
djangorestframework-csv==2.1.1
lxml==4.9.1
whitenoise==5.3.0
wpull==2.0.1

# wpull doesn't set upper bounds for some of its requirements,
# so we need to specify these manually:
# See https://github.com/ArchiveTeam/wpull/blob/v2.0.1/requirements.txt
html5lib==0.9999999
sqlalchemy==1.0.12
tornado==4.5.3
# Ensure libxml2 is loaded dynamically; see
# https://html5-parser.readthedocs.io/en/latest/#unix
wpull@https://github.com/ArchiveTeam/ludios_wpull/archive/refs/tags/5.0.3.tar.gz --no-binary=lxml

0 comments on commit 4aeac08

Please sign in to comment.