openzim · benoit74 · Aug 29, 2023 · Jul 31, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/Dockerfile b/Dockerfile
@@ -10,24 +10,15 @@ FROM python:3.11.4-bookworm
 LABEL org.opencontainers.image.source https://github.com/openzim/freecodecamp
 
 RUN python -m pip install --no-cache-dir -U \
-      pip \
-      pip-tools
-
-# Copy pyproject.toml and its dependencies and install Python dependencies
-# This is separated to benefit from Docker build cache when only
-# zimui or Python source code is modified (which is quite often the case)
-COPY scraper/src/fcc2zim/__about__.py /src/scraper/src/fcc2zim/__about__.py
-COPY scraper/pyproject.toml scraper/pypi-readme.rst /src/scraper/
-RUN pip-compile --strip-extras -o requirements.txt /src/scraper/pyproject.toml \
- && pip install --no-cache-dir -r requirements.txt \
- && rm requirements.txt
-
-# Copy zimui build output
-COPY --from=zimui /src/dist /src/zimui
+      pip
 
-# Copy scraper and install it
+# Copy code + associated artifacts + zimui build output
+COPY LICENSE LICENSE.fcc.md README.md /src/
+COPY scraper/pyproject.toml scraper/tasks.py /src/scraper/
 COPY scraper/src /src/scraper/src
-COPY scraper/*.md scraper/*.rst LICENSE LICENSE.fcc.md scraper/*.py /src/scraper/
+COPY --from=zimui /src/dist /src/zimui
+
+# Install + cleanup
 RUN pip install --no-cache-dir /src/scraper \
  && rm -rf /src/scraper
 

diff --git a/Makefile b/Makefile
diff --git a/README.md b/README.md
@@ -9,27 +9,21 @@ This scraper downloads selected [freeCodeCamp](https://www.freecodecamp.org/) co
 [![PyPI version shields.io](https://img.shields.io/pypi/v/fcc2zim.svg)](https://pypi.org/project/fcc2zim/)
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/fcc2zim.svg)](https://pypi.org/project/fcc2zim/)
 [![Docker](https://ghcr-badge.deta.dev/openzim/freecodecamp/latest_tag?label=docker)](https://ghcr.io/openzim/freecodecamp)
-
-
 ## Architecture
 This project consists of two major components:
 
-- `scraper` - The Python tool that build FCC ZIM. It is responsible to:
-    - fetch FCC curriculum and package it into a proper format
-    - embed client can read, as well as our zim builder
 - `zimui` - A Vue.JS application specially crafted to:
     - be embeded inside the ZIM and serve as main entry point (through compilation for offline usage with Vite)
     - present FCC curriculum, including solving exercices
     - be compatible with most ZIM readers
+- `scraper` - The Python tool that build FCC ZIM. It is responsible to:
+    - fetch FCC curriculum and package it into a proper format
+    - embed client can read, as well as our zim builder
 
-## Scraper operation
+## Dependencies
 
-The scraper assumes that `zimui` is already built and ready to be embedded into the ZIM in the `<zimui_dir>`.
+Aside Node.JS and Python dependencies which are managed, other binary dependencies comes from Python [zimscraperlib](https://github.com/openzim/python-scraperlib/)
 
-The scraper operation is divided into three phases:
-1. `fetch`: retrieve curriculum source code from Github and extracts it into `<tmp_dir>/curriculum` folder
-2. `prebuild`: transform curriculum data into files ready to be consumed by the Vite UI
-3. `build`: create the final ZIM with `zimui` and curriculum data
 
 ## Development
 
@@ -40,23 +34,27 @@ See [CONTRIBUTING.md](CONTRIBUTING.md).
 - Node 20.x
 - Python 3.11
 
-See: [`Makefile`](Makefile) for a full build process
+### Running scraper locally
 
-### Running scraper with Docker
-
-Run from official version (published on GHCR.io) ; ZIM will be available in the `output` sub-folder of current working directory.
+You have to:
+- build the `zimui` frontend which will be embededed inside the ZIM (and redo it every time you make modifications to the `zimui`)
+- run the `scraper` to retrieve FCC curriculum and build the ZIM
 
+Sample commands:
 ```
-docker run --rm -it -v $(pwd)/output:/output ghcr.io/openzim/freecodecamp:latest --language eng --course "regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects" --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
+cd zimui
+yarn install
+yarn build
+cd ../scraper
+hatch run fcc2zim --zimui-dist-dir ../zimui/dist --language eng --course "regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects" --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
 ```
 
-You might add `-v $(pwd)/tmp:/tmp` parameter to also mount temporary directory and keep temporary artificats.
+### Running scraper with Docker
 
-You might use the `dev` Docker image tag instead of `latest` to use current development version (based on Github `main` branch).
+Run from official version (published on GHCR.io) ; ZIM will be available in the `output` sub-folder of current working directory.
 
-You might build your own local Docker image and then use it with `openzim/freecodecamp` instead of `ghcr.io/openzim/freecodecamp:latest`:
 ```
-docker build -t openzim/freecodecamp .
+docker run --rm -it -v $(pwd)/output:/output ghcr.io/openzim/freecodecamp:latest --language eng --course "regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects" --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
 ```
 
 ## Course Options and Limitations
@@ -66,7 +64,3 @@ Currently this scraper only supports Javascript challenges. A list of courses is
 You can find a list of course slugs in the [freeCodeCamp curriculum folder](https://github.com/freeCodeCamp/freeCodeCamp/tree/main/curriculum/challenges/english/02-javascript-algorithms-and-data-structures)
 
 In docker example above, see the `--course` argument : `regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects`
-
-## License
-
-This repository is licensed under GPLv3, with the exception of the freeCodeCamp curriculum which is licensed under BSD 3 Clause (see LICENSE.fcc.md).
diff --git a/scraper/pypi-readme.rst b/scraper/pypi-readme.rst
diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml
@@ -7,10 +7,10 @@ name = "fcc2zim"
 authors = [
   { name = "Kiwix", email = "[email protected]" },
 ]
-keywords = ["fcc freecodecamp zim kiwix openzim offline"]
+keywords = ["fcc","freecodecamp","zim","kiwix","openzim","offline"]
 requires-python = ">=3.11"
-description = "Make ZIM files from Freecodedcamp courses"
-readme = "pypi-readme.rst"
+description = "Make ZIM files from freeCodeCamp courses"
+readme = "../README.md"
 license = {text = "GPL-3.0-or-later"}
 classifiers = [
     "Programming Language :: Python :: 3",
@@ -159,6 +159,8 @@ select = [
 ignore = [
   # Allow non-abstract empty methods in abstract base classes
   "B027",
+  # Allow use of date.today
+  "DTZ011",
   # Remove flake8-errmsg since we consider they bloat the code and provide limited value
   "EM",
   # Allow boolean positional values in function calls, like `dict.get(... True)`