From 845719346eb226e33effc1861347c88f9c342e4c Mon Sep 17 00:00:00 2001 From: Andy Chosak Date: Thu, 2 Nov 2023 11:14:52 -0400 Subject: [PATCH] Remove or rename existing "crawsqueal" references With the renaming of this repository from "crawsqueal" to "website-indexer", there are a few "crawsqueal" references that need to be removed or renamed. --- fabfile.py | 24 ++++++++++--------- package.json | 8 +++---- .../static_src/{crawsqueal.css => main.css} | 0 viewer/static_src/{crawsqueal.js => main.js} | 0 viewer/templates/viewer/base.html | 4 ++-- viewer/templates/viewer/page_list.html | 2 +- 6 files changed, 20 insertions(+), 18 deletions(-) rename viewer/static_src/{crawsqueal.css => main.css} (100%) rename viewer/static_src/{crawsqueal.js => main.js} (100%) diff --git a/fabfile.py b/fabfile.py index 0d97059..2a40444 100644 --- a/fabfile.py +++ b/fabfile.py @@ -27,26 +27,28 @@ PYTHON_INSTALL_ROOT = f"{DEPLOY_ROOT}/{PYTHON_BASENAME}" SOURCE_PARENT = f"{DEPLOY_ROOT}/cfpb" -SOURCE_REPO = "https://github.com/cfpb/crawsqueal.git" -SOURCE_DIRNAME = "crawsqueal" +SOURCE_REPO = "https://github.com/cfpb/website-indexer.git" +SOURCE_DIRNAME = "website-indexer" SOURCE_ROOT = f"{SOURCE_PARENT}/{SOURCE_DIRNAME}" -CRAWL_DATABASE = "/var/tmp/crawl.sqlite3" +CRAWL_DIR = "/var/tmp" +CRAWL_DATABASE = f"{CRAWL_DIR}/crawl.sqlite3" +CRAWL_DATABASE_TMP = f"{CRAWL_DIR}/crawl-new.sqlite3" LOGROTATE_DIR = "/etc/logrotate.d" -LOGROTATE_NAME = "crawsqueal" +LOGROTATE_NAME = "website-indexer" LOGROTATE_PATH = f"{LOGROTATE_DIR}/{LOGROTATE_NAME}" SYSTEMD_DIR = "/etc/systemd/system" -SYSTEMD_SERVICE = "crawsqueal" +SYSTEMD_SERVICE = "website-indexer" SYSTEMD_NAME = f"{SYSTEMD_SERVICE}.service" SYSTEMD_PATH = f"{SYSTEMD_DIR}/{SYSTEMD_NAME}" -CRONTAB_NAME = "crawsqueal" +CRONTAB_NAME = "website-indexer" CRONTAB_DIR = "/etc/cron.d" CRONTAB_PATH = f"{CRONTAB_DIR}/{CRONTAB_NAME}" -LOG_DIR = "/var/log/crawsqueal" +LOG_DIR = "/var/log/website-indexer" @task @@ -144,10 +146,10 @@ def deploy(conn): "SHELL=/bin/bash\n" f"0 0 * * * {conn.user} " f"cd {SOURCE_ROOT} && " - f"./wget_crawl.sh https://www.consumerfinance.gov/ && " - f"PYTHONPATH=. DJANGO_SETTINGS_MODULE=settings ./venv/bin/django-admin " - "warc_to_db --recreate ./crawl.warc.gz ./crawl.sqlite3 && " - f"mv crawl.{{cdx,sqlite3,warc.gz}} wget.log /var/tmp/\n" + f"./venv/bin/python manage.py crawl --recreate " + f"https://www.consumerfinance.gov {CRAWL_DATABASE_TMP} " + f"> {CRAWL_DIR}/crawl.log 2>&1 && " + f"mv {CRAWL_DATABASE_TMP} {CRAWL_DATABASE}\n" "EOF'" ) diff --git a/package.json b/package.json index 951d34c..755f4eb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { - "name": "crawsqueal", - "version": "1.0.0", + "name": "website-indexer", + "version": "1.1.0", "license": "CC0-1.0", "type": "module", "engines": { @@ -10,8 +10,8 @@ "prettier": "prettier --check 'viewer/static_src/*.{css,js}' 'viewer/**/*.html'", "fix": "npm run prettier -- --write", "fonts": "cp -r viewer/static_src/fonts viewer/static/fonts", - "styles": "curl -o viewer/static/main.css https://www.consumerfinance.gov/static/css/main.css && curl https://www.consumerfinance.gov/static/apps/regulations3k/css/main.css >> viewer/static/main.css && cp viewer/static_src/crawsqueal.css viewer/static/crawsqueal.css", - "scripts": "esbuild viewer/static_src/crawsqueal.js --bundle --outfile=viewer/static/crawsqueal.js", + "styles": "curl -o viewer/static/cfgov.css https://www.consumerfinance.gov/static/css/main.css && curl https://www.consumerfinance.gov/static/apps/regulations3k/css/main.css >> viewer/static/cfgov.css && cp viewer/static_src/main.css viewer/static/main.css", + "scripts": "esbuild viewer/static_src/main.js --bundle --outfile=viewer/static/main.js", "build": "yarn fonts && yarn styles && yarn scripts" }, "dependencies": { diff --git a/viewer/static_src/crawsqueal.css b/viewer/static_src/main.css similarity index 100% rename from viewer/static_src/crawsqueal.css rename to viewer/static_src/main.css diff --git a/viewer/static_src/crawsqueal.js b/viewer/static_src/main.js similarity index 100% rename from viewer/static_src/crawsqueal.js rename to viewer/static_src/main.js diff --git a/viewer/templates/viewer/base.html b/viewer/templates/viewer/base.html index 55b7c73..3f33cea 100644 --- a/viewer/templates/viewer/base.html +++ b/viewer/templates/viewer/base.html @@ -12,8 +12,8 @@ {% block title %}Consumerfinance.gov web page index{% endblock %} + -
@@ -39,6 +39,6 @@
- + diff --git a/viewer/templates/viewer/page_list.html b/viewer/templates/viewer/page_list.html index c869b31..7e03ad5 100644 --- a/viewer/templates/viewer/page_list.html +++ b/viewer/templates/viewer/page_list.html @@ -184,7 +184,7 @@

({{ crawl_stats.database_size | filesizeformat }}) to + href="https://github.com/cfpb/website-indexer#searching-the-crawl-database"> query the data locally