openclimatefix · confusedmatrix · Jan 25, 2024 · Jan 11, 2024 · Jan 11, 2024 · Jan 17, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: CI Pipeline for SDK - Python
+name: CI pipeline for India Forecast App
 
 on: push
 
@@ -8,16 +8,16 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out the repo
-        uses: actions/checkout@v2
-
+        uses: actions/checkout@v4
+        
       - name: Install poetry
         run: pipx install poetry==1.7.1
-
+        
       - name: Install python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: '3.9'
-          cache: poetry
+          python-version: '3.11'
+          cache: 'poetry'
 
       - name: Install python dependencies
         run: poetry install
@@ -34,7 +34,7 @@ jobs:
   release:
     needs: [lint_and_test]
     if: github.ref_name == 'main'
-    uses: openclimatefix/.github/.github/workflows/docker-release.yml@v1.2.0
+    uses: openclimatefix/.github/.github/workflows/docker-release.yml@v1.8.1
     secrets:
       DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
       DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}

diff --git a/Makefile b/Makefile
@@ -1,20 +1,15 @@
 #
 # This mostly contains shortcut for multi-command steps.
 #
-SRC = india_forecast_app tests
+SRC = india_forecast_app scripts tests
 
 .PHONY: lint
 lint:
 	poetry run ruff $(SRC)
-	poetry run black --check $(SRC)
-	poetry run isort --check $(SRC)
-
 
 .PHONY: format
 format:
 	poetry run ruff --fix $(SRC)
-	poetry run black $(SRC)
-	poetry run isort $(SRC)
 
 .PHONY: test
 test:

diff --git a/README.md b/README.md
@@ -26,6 +26,35 @@ make format
 make test
 ```
 
+## Running the app locally
+Replace `{DB_URL}` with a postgres DB connection string (see below for setting up a ephemeral local DB)
+
+If testing on a local DB, you may use the following script to seed the the DB with a dummy user, site and site_group. 
+```
+DB_URL={DB_URL} poetry run seeder
+```
+⚠️ Note this is a destructive script and will drop all tables before recreating them to ensure a clean slate. DO NOT RUN IN PRODUCTION ENVIRONMENTS
+
+This example invokes app.py and passes the help flag
+```
+DB_URL={DB_URL} poetry run app --help
+```
+
+### Starting a local database using docker
+
+```bash
+    docker run \
+        -it --rm \
+        -e POSTGRES_USER=postgres \
+        -e POSTGRES_PASSWORD=postgres \
+        -p 54545:5432 postgres:14-alpine \
+        postgres
+```
+
+The corresponding `DB_URL` will be
+
+`postgresql://postgres:postgres@localhost:54545/postgres`
+
 ## Building and running in [Docker](https://www.docker.com/)
 
 Build the Docker image

diff --git a/india_forecast_app/__init__.py b/india_forecast_app/__init__.py
@@ -1,2 +1,2 @@
 """India Forecast App"""
-__version__ = "0.1.0"
+__version__ = "0.1.0"
diff --git a/india_forecast_app/app.py b/india_forecast_app/app.py
@@ -1,10 +1,184 @@
+"""
+Main forecast app entrypoint
+"""
+
+import datetime as dt
+import logging
+import os
+import sys
+
 import click
+import pandas as pd
+from pvsite_datamodel import DatabaseConnection
+from pvsite_datamodel.read import get_sites_by_country
+from pvsite_datamodel.write import insert_forecast_values
+from sqlalchemy.orm import Session
+
+from .model import DummyModel
+
+log = logging.getLogger(__name__)
+
+
+def get_site_ids(db_session: Session) -> list[str]:
+    """
+    Gets all avaiable site_ids in India
+
+    Args:
+            db_session: A SQLAlchemy session
+
+    Returns:
+            A list of site_ids
+    """
+
+    sites = get_sites_by_country(db_session, country="india")
+
+    return [s.site_uuid for s in sites]
+
+
+def get_model():
+    """
+    Instantiates and returns the forecast model ready for running inference
+
+    Returns:
+            A forecasting model
+    """
+
+    model = DummyModel()
+    return model
+
+
+def run_model(model, site_id: str, timestamp: dt.datetime):
+    """
+    Runs inference on model for the given site & timestamp
+
+    Args:
+            model: A forecasting model
+            site_id: A specific site ID
+            timestamp: timestamp to run a forecast for
+
+    Returns:
+            A forecast or None if model inference fails
+    """
+
+    try:
+        forecast = model.predict(site_id=site_id, timestamp=timestamp)
+    except Exception:
+        log.error(
+            f"Error while running model.predict for site_id={site_id}. Skipping",
+            exc_info=True,
+        )
+        return None
+
+    return forecast
+
+
+def save_forecast(db_session: Session, forecast, write_to_db: bool):
+    """
+    Saves a forecast for a given site & timestamp
+
+    Args:
+            db_session: A SQLAlchemy session
+            forecast: a forecast dict containing forecast meta and predicted values
+            write_to_db: If true, forecast values are written to db, otherwise to stdout
+
+    Raises:
+            IOError: An error if database save fails
+    """
+
+    forecast_meta = {
+        "site_uuid": forecast["meta"]["site_id"],
+        "timestamp_utc": forecast["meta"]["timestamp"],
+        "forecast_version": forecast["meta"]["version"],
+    }
+    forecast_values_df = pd.DataFrame(forecast["values"])
+    forecast_values_df["horizon_minutes"] = (
+        (forecast_values_df["start_utc"] - forecast_meta["timestamp_utc"])
+        / pd.Timedelta("60s")
+    ).astype("int")
+
+    if write_to_db:
+        insert_forecast_values(db_session, forecast_meta, forecast_values_df)
+    else:
+        log.info(
+            f'site_id={forecast_meta["site_uuid"]}, \
+            timestamp={forecast_meta["timestamp_utc"]}, \
+            version={forecast_meta["forecast_version"]}, \
+            forecast values={forecast_values_df.to_string()}'
+        )
+
 
 @click.command()
-@click.option("--site", help="Site ID")
-def app(site):
-	"""Runs the forecast for a given site"""
-	print(f"Running forecast for site: {site}")
+@click.option(
+    "--date",
+    "-d",
+    "timestamp",
+    type=click.DateTime(formats=["%Y-%m-%d-%H-%M"]),
+    default=None,
+    help='Date-time (UTC) at which we make the prediction. \
+Format should be YYYY-MM-DD-HH-mm. Defaults to "now".',
+)
+@click.option(
+    "--write-to-db",
+    is_flag=True,
+    default=False,
+    help="Set this flag to actually write the results to the database.",
+)
+@click.option(
+    "--log-level",
+    default="info",
+    help="Set the python logging log level",
+    show_default=True,
+)
+def app(timestamp: dt.datetime | None, write_to_db: bool, log_level: str):
+    """
+    Main function for running forecasts for sites in India
+    """
+    logging.basicConfig(stream=sys.stdout, level=getattr(logging, log_level.upper()))
+
+    if timestamp is None:
+        timestamp = dt.datetime.now(tz=dt.UTC)
+        log.info('Timestamp omitted - will generate forecasts for "now"')
+    else:
+        # Ensure timestamp is UTC
+        timestamp.replace(tzinfo=dt.UTC)
+
+    # 0. Initialise DB connection
+    url = os.environ["DB_URL"]
+
+    db_conn = DatabaseConnection(url, echo=False)
+
+    with db_conn.get_session() as session:
+
+        # 1. Get sites
+        log.info("Getting sites")
+        site_ids = get_site_ids(session)
+
+        # 2. Load model
+        log.info("Loading model")
+        model = get_model()
+
+        # 3. Run model for each site
+        for site_id in site_ids:
+            log.info(f"Running model for site={site_id}")
+            forecast_values = run_model(model=model, site_id=site_id, timestamp=timestamp)
+
+            if forecast_values is not None:
+                # 4. Write forecast to DB or stdout
+                log.info(f"Writing forecast for site_id={site_id}")
+                forecast = {
+                    "meta": {
+                        "site_id": site_id,
+                        "version": model.version,
+                        "timestamp": timestamp,
+                    },
+                    "values": forecast_values,
+                }
+                save_forecast(
+                    session,
+                    forecast=forecast,
+                    write_to_db=write_to_db,
+                )
+
 
 if __name__ == "__main__":
-	app()
+    app()
diff --git a/india_forecast_app/model.py b/india_forecast_app/model.py
@@ -0,0 +1,104 @@
+"""
+Model classes (currently just allows for loading a dummy model)
+"""
+
+import datetime as dt
+import math
+import random
+
+
+class DummyModel:
+    """
+    Dummy model that emulates the capabilities expected by a real model
+    """
+
+    @property
+    def version(self):
+        """Version number"""
+        return "0.0.0"
+
+    def __init__(self):
+        """Initializer for the model"""
+        pass
+
+    def predict(self, site_id: str, timestamp: dt.datetime):
+        """Make a prediction for the model"""
+        return self._generate_dummy_forecast(timestamp)
+
+    def _generate_dummy_forecast(self, timestamp: dt.datetime):
+        """Generates a fake 2-day forecast (15 minute intervals"""
+        start = timestamp
+        end = timestamp + dt.timedelta(days=2)
+        step = dt.timedelta(minutes=15)
+        numSteps = int((end - start) / step)
+        values: list[dict] = []
+
+        for i in range(numSteps):
+            time = start + i * step
+            _yield = _basicSolarYieldFunc(int(time.timestamp()))
+            values.append(
+                {
+                    "start_utc": time,
+                    "end_utc": time + step,
+                    "forecast_power_kw": int(_yield),
+                }
+            )
+
+        return values
+
+
+def _basicSolarYieldFunc(timeUnix: int, scaleFactor: int = 10000) -> float:
+    """Gets a fake solar yield for the input time.
+
+    The basic yield function is built from a sine wave
+    with a period of 24 hours, peaking at 12 hours.
+    Further convolutions modify the value according to time of year.
+
+    Args:
+        timeUnix: The time in unix time.
+        scaleFactor: The scale factor for the sine wave.
+            A scale factor of 10000 will result in a peak yield of 10 kW.
+    """
+    # Create a datetime object from the unix time
+    time = dt.datetime.fromtimestamp(timeUnix, tz=dt.UTC)
+    # The functions x values are hours, so convert the time to hours
+    hour = time.day * 24 + time.hour + time.minute / 60 + time.second / 3600
+
+    # scaleX makes the period of the function 24 hours
+    scaleX = math.pi / 12
+    # translateX moves the minimum of the function to 0 hours
+    translateX = -math.pi / 2
+    # translateY modulates the base function based on the month.
+    # * + 0.5 at the summer solstice
+    # * - 0.5 at the winter solstice
+    translateY = math.sin((math.pi / 6) * time.month + translateX) / 2.0
+
+    # basefunc ranges between -1 and 1 with a period of 24 hours,
+    # peaking at 12 hours.
+    # translateY changes the min and max to range between 1.5 and -1.5
+    # depending on the month.
+    basefunc = math.sin(scaleX * hour + translateX) + translateY
+    # Remove negative values
+    basefunc = max(0, basefunc)
+    # Steepen the curve. The divisor is based on the max value
+    basefunc = basefunc**4 / 1.5**4
+
+    # Instead of completely random noise, apply based on the following process:
+    # * A base noise function which is the product of long and short sines
+    # * The resultant function modulates with very small amplitude around 1
+    noise = (math.sin(math.pi * time.hour) / 20) * (
+        math.sin(math.pi * time.hour / 3)
+    ) + 1
+    noise = noise * random.random() / 20 + 0.97
+
+    # Create the output value from the base function, noise, and scale factor
+    output = basefunc * noise * scaleFactor
+
+    return output
+
+
+def _basicWindYieldFunc(timeUnix: int, scaleFactor: int = 10000) -> float:
+    """Gets a fake wind yield for the input time."""
+    output = min(scaleFactor, scaleFactor * 10 * random.random())
+
+    return output