diff --git a/poetry.lock b/poetry.lock index 29ff0ad..9b1280c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,24 +1,5 @@ # This file is automatically @generated by Poetry and should not be changed by hand. -[[package]] -name = "alembic" -version = "1.9.4" -description = "A database migration tool for SQLAlchemy." -category = "main" -optional = false -python-versions = ">=3.7" -files = [ - {file = "alembic-1.9.4-py3-none-any.whl", hash = "sha256:6f1c2207369bf4f49f952057a33bb017fbe5c148c2a773b46906b806ea6e825f"}, - {file = "alembic-1.9.4.tar.gz", hash = "sha256:4d3bd32ecdbb7bbfb48a9fe9e6d6fd6a831a1b59d03e26e292210237373e7db5"}, -] - -[package.dependencies] -Mako = "*" -SQLAlchemy = ">=1.3.0" - -[package.extras] -tz = ["python-dateutil"] - [[package]] name = "anyio" version = "3.6.2" @@ -968,21 +949,6 @@ files = [ {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"}, ] -[[package]] -name = "freezegun" -version = "1.2.2" -description = "Let your Python tests travel through time" -category = "main" -optional = false -python-versions = ">=3.6" -files = [ - {file = "freezegun-1.2.2-py3-none-any.whl", hash = "sha256:ea1b963b993cb9ea195adbd893a48d573fda951b0da64f60883d7e988b606c9f"}, - {file = "freezegun-1.2.2.tar.gz", hash = "sha256:cd22d1ba06941384410cd967d8a99d5ae2442f57dfafeff2fda5de8dc5c05446"}, -] - -[package.dependencies] -python-dateutil = ">=2.7" - [[package]] name = "geopandas" version = "0.12.2" @@ -1015,6 +981,7 @@ files = [ {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, + {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"}, {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, @@ -1023,6 +990,7 @@ files = [ {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, + {file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, @@ -1052,6 +1020,7 @@ files = [ {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, + {file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, @@ -1060,6 +1029,7 @@ files = [ {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, + {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"}, {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, @@ -1591,26 +1561,6 @@ files = [ {file = "kiwisolver-1.4.4.tar.gz", hash = "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955"}, ] -[[package]] -name = "mako" -version = "1.2.4" -description = "A super-fast templating language that borrows the best ideas from the existing templating languages." -category = "main" -optional = false -python-versions = ">=3.7" -files = [ - {file = "Mako-1.2.4-py3-none-any.whl", hash = "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818"}, - {file = "Mako-1.2.4.tar.gz", hash = "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"}, -] - -[package.dependencies] -MarkupSafe = ">=0.9.2" - -[package.extras] -babel = ["Babel"] -lingua = ["lingua"] -testing = ["pytest"] - [[package]] name = "markupsafe" version = "2.1.2" @@ -2037,28 +1987,6 @@ jupyter-server = ">=1.8,<3" [package.extras] test = ["pytest", "pytest-console-scripts", "pytest-tornasync"] -[[package]] -name = "nowcasting-datamodel" -version = "1.1.54" -description = "Data Model for the OCF nowcasting project" -category = "main" -optional = false -python-versions = "*" -files = [ - {file = "nowcasting_datamodel-1.1.54-py3-none-any.whl", hash = "sha256:90eac9c11fc9a06e63605087668da511c9d4c035c904c797fc0f6959c23f17ed"}, - {file = "nowcasting_datamodel-1.1.54.tar.gz", hash = "sha256:dccd72272f68e66cb7eab24be792ff1a8e81ca8de09cacc07e8c00152b4c7b78"}, -] - -[package.dependencies] -alembic = "*" -click = "*" -freezegun = "*" -numpy = "*" -pandas = "*" -psycopg2-binary = "*" -pydantic = "*" -sqlalchemy = "*" - [[package]] name = "numexpr" version = "2.8.4" @@ -3751,4 +3679,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "d2eec64d23e764862d80e688fb91df645afb85349aea9c16cd2fdd85b9643836" +content-hash = "064a0152d86012ee67b0ff228e61d2c171c2faf5e7ce25765e09eadfa79912d8" diff --git a/pvconsumer/app.py b/pvconsumer/app.py index e5fc2de..6ab35bb 100644 --- a/pvconsumer/app.py +++ b/pvconsumer/app.py @@ -12,19 +12,17 @@ from typing import List, Optional, Tuple import click -from nowcasting_datamodel.connection import DatabaseConnection -from nowcasting_datamodel.models.base import Base_Forecast, Base_PV -from nowcasting_datamodel.models.pv import PVSystemSQL -from nowcasting_datamodel.read.read import update_latest_input_data_last_updated +import pandas as pd from pvoutput import PVOutput -from pvsite_datamodel.connection import DatabaseConnection as PVSiteDatabaseConnection +from pvsite_datamodel.connection import DatabaseConnection +from pvsite_datamodel.sqlmodels import SiteSQL from sqlalchemy.orm import Session import pvconsumer from pvconsumer.pv_systems import filter_pv_systems_which_have_new_data, get_pv_systems -from pvconsumer.save import save_to_database, save_to_pv_site_database +from pvconsumer.save import save_to_pv_site_database from pvconsumer.solar_sheffield_passiv import get_all_latest_pv_yield_from_solar_sheffield -from pvconsumer.utils import FakeDatabaseConnection, format_pv_data +from pvconsumer.utils import format_pv_data logging.basicConfig( level=getattr(logging, os.getenv("LOGLEVEL", "INFO")), @@ -37,20 +35,6 @@ @click.option( "--db-url", default=None, - envvar="DB_URL", - help="The Database URL where pv data will be saved", - type=click.STRING, -) -@click.option( - "--db-url-forecast", - default=None, - envvar="DB_URL_FORECAST", - help="The Database URL where update latest data will be saved", - type=click.STRING, -) -@click.option( - "--db-url-pv-site", - default=None, envvar="DB_URL_PV_SITE", help="The PV site Database URL where update latest data will be saved", type=click.STRING, @@ -71,8 +55,6 @@ ) def app( db_url: str, - db_url_forecast: str, - db_url_pv_site: Optional[str] = None, filename: Optional[str] = None, provider: str = "pvoutput.org", ): @@ -87,15 +69,9 @@ def app( logger.info(f"Running PV Consumer app ({pvconsumer.__version__})") - connection = DatabaseConnection(url=db_url, base=Base_PV, echo=False) - connection_forecast = DatabaseConnection(url=db_url_forecast, base=Base_Forecast, echo=False) - - if db_url_pv_site is not None: - connection_pv_site = PVSiteDatabaseConnection(url=db_url_pv_site, echo=False) - else: - connection_pv_site = FakeDatabaseConnection() + connection = DatabaseConnection(url=db_url, echo=False) - with connection.get_session() as session, connection_pv_site.get_session() as session_pv_site: + with connection.get_session() as session: # 1. Read list of PV systems (from local file) # and get their refresh times (refresh times can also be stored locally) logger.debug("Read list of PV systems (from local file)") @@ -107,27 +83,21 @@ def app( "Find most recent entered data (for each PV system) in OCF database," "and filter pv systems depending on refresh rate" ) - pv_systems = filter_pv_systems_which_have_new_data(pv_systems=pv_systems) + pv_systems = filter_pv_systems_which_have_new_data(pv_systems=pv_systems, session=session) # 3. Pull data pull_data_and_save( pv_systems=pv_systems, session=session, provider=provider, - session_pv_site=session_pv_site, ) - # update latest data - with connection_forecast.get_session() as session_forecast: - update_latest_input_data_last_updated(session=session_forecast, component="pv") - def pull_data_and_save( - pv_systems: List[PVSystemSQL], + pv_systems: List[SiteSQL], session: Session, provider: str, datetime_utc: Optional[None] = None, - session_pv_site: Optional[Session] = None, ): """ Pull the pv ield data and save to database @@ -157,15 +127,16 @@ def pull_data_and_save( n_pv_systems_per_batch = 50 pv_system_chunks = chunks(original_list=pv_systems, n=n_pv_systems_per_batch) - all_pv_yields_sql = [] + all_pv_yields_df = [] + i = 0 for pv_system_chunk in pv_system_chunks: - # get all the pv system ids from a a group of pv systems - pv_system_ids = [pv_system_id.pv_system_id for pv_system_id in pv_system_chunk] - if provider == "pvoutput.org": # set up pv output.org pv_output = PVOutput() + # get all the pv system ids from a a group of pv systems + pv_system_ids = [pv_system.client_site_id for pv_system in pv_system_chunk] + logger.debug(f"Getting data from {provider}") # lets take the date of the datetime now. @@ -185,45 +156,45 @@ def pull_data_and_save( else: raise Exception(f"Can not use provider {provider}") - for i, pv_system in enumerate(pv_system_chunk): + for _, pv_system in enumerate(pv_system_chunk): + i = i + 1 logger.debug( - f"Processing {i}th pv system ({pv_system.pv_system_id=}), " + f"Processing {i}th pv system ({pv_system.client_site_id=}), " f"out of {len(pv_systems)}" ) # take only the data we need for system id pv_yield_df = all_pv_yield_df[ - all_pv_yield_df["system_id"].astype(int) == pv_system.pv_system_id + all_pv_yield_df["system_id"].astype(int) == pv_system.client_site_id ] + pv_yield_df["site_uuid"] = pv_system.site_uuid logger.debug( f"Got {len(pv_yield_df)} pv yield for " - f"pv systems {pv_system.pv_system_id} before filtering" + f"pv systems {pv_system.client_site_id} before filtering" ) if len(pv_yield_df) == 0: - logger.warning(f"Did not find any data for {pv_system.pv_system_id} for {date}") + logger.warning(f"Did not find any data for {pv_system.client_site_id} for {date}") else: - pv_yields_sql = format_pv_data(pv_system=pv_system, pv_yield_df=pv_yield_df) + # filter out which is in our database and a funny 0 bug + pv_yield_df = format_pv_data( + pv_system=pv_system, pv_yield_df=pv_yield_df, session=session + ) - all_pv_yields_sql = all_pv_yields_sql + pv_yields_sql + if len(all_pv_yields_df) == 0: + all_pv_yields_df = pv_yield_df + else: + all_pv_yields_df = pd.concat([all_pv_yields_df, pv_yield_df]) - if len(all_pv_yields_sql) > 100: + if len(all_pv_yields_df) > 100: # 4. Save to database - perhaps check no duplicate data. (for each PV system) - save_to_database(session=session, pv_yields=all_pv_yields_sql) - all_pv_yields_sql = [] - - # 5. save to pv sites database - if session_pv_site is not None: - # TODO we are current doing this every round, - # we might find we have to batch it like the other save method - save_to_pv_site_database( - session=session_pv_site, pv_system=pv_system, pv_yield_df=pv_yield_df - ) - session_pv_site.commit() + save_to_pv_site_database(session=session, pv_yield_df=all_pv_yields_df) + all_pv_yields_df = [] # 4. Save to database - perhaps check no duplicate data. (for each PV system) - save_to_database(session=session, pv_yields=all_pv_yields_sql) + logger.debug(all_pv_yields_df) + save_to_pv_site_database(session=session, pv_yield_df=all_pv_yields_df) def chunks(original_list: List, n: int) -> Tuple[List]: diff --git a/pvconsumer/pv_systems.py b/pvconsumer/pv_systems.py index a8d7683..c98a4d3 100644 --- a/pvconsumer/pv_systems.py +++ b/pvconsumer/pv_systems.py @@ -1,24 +1,26 @@ """ PV system functions """ import logging import os -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta from typing import List, Optional import pandas as pd -from nowcasting_datamodel.models.pv import PVSystem, PVSystemSQL, pv_output, solar_sheffield_passiv -from nowcasting_datamodel.read.read_pv import get_latest_pv_yield -from nowcasting_datamodel.read.read_pv import get_pv_systems as get_pv_systems_from_db from pvoutput import PVOutput +from pvsite_datamodel.read import get_all_sites +from pvsite_datamodel.sqlmodels import GenerationSQL, SiteSQL +from sqlalchemy import func from sqlalchemy.orm import Session import pvconsumer from pvconsumer.solar_sheffield_passiv import get_all_systems_from_solar_sheffield -from pvconsumer.utils import df_to_list_pv_system, list_pv_system_to_df +from pvconsumer.utils import pv_output, solar_sheffield_passiv + +# from pvconsumer.utils import df_to_list_pv_system, list_pv_system_to_df logger = logging.getLogger(__name__) -def load_pv_systems(provider: str = pv_output, filename: Optional[str] = None) -> List[PVSystem]: +def load_pv_systems(provider: str = pv_output, filename: Optional[str] = None) -> pd.DataFrame: """ Load pv systems from file @@ -36,25 +38,23 @@ def load_pv_systems(provider: str = pv_output, filename: Optional[str] = None) - logger.debug(f"Loading local pv systems from {filename}") - pv_capacity = pd.read_csv(filename) - - pv_systems = df_to_list_pv_system(pv_systems_df=pv_capacity) + pv_systems_df = pd.read_csv(filename, index_col=0) - return pv_systems + return pv_systems_df def find_missing_pv_systems( - pv_systems_local: List[PVSystem], - pv_systems_db: List[PVSystem], + pv_systems_local: pd.DataFrame, + pv_systems_db: pd.DataFrame, provider: str, -) -> List[PVSystem]: +) -> pd.DataFrame: """ Find missing pv systems Gte the pv systems that are in local file, but not in the database Args: - pv_systems_local: list of pv systems stored locally - pv_systems_db: list of pv systems from the database + pv_systems_local: dataframe with "pv_system_id" from local file + pv_systems_db: dataframe with "pv_system_id" from local db Returns: list of pv systems that are not in the database @@ -65,9 +65,11 @@ def find_missing_pv_systems( if len(pv_systems_db) == 0: return pv_systems_local - # change to dataframes - pv_systems_db = list_pv_system_to_df(pv_systems=pv_systems_db)[["pv_system_id"]] - pv_systems_local = list_pv_system_to_df(pv_systems=pv_systems_local)[["pv_system_id"]] + # get system ids + if "pv_system_id" not in pv_systems_db.columns: + pv_systems_db["pv_system_id"] = pv_systems_db["client_site_id"] + pv_systems_db = pv_systems_db[["pv_system_id"]] + pv_systems_local = pv_systems_local[["pv_system_id"]] # https://stackoverflow.com/questions/28901683/pandas-get-rows-which-are-not-in-other-dataframe # merge together @@ -79,12 +81,12 @@ def find_missing_pv_systems( pv_systems_missing = df_all[missing].copy() pv_systems_missing["provider"] = provider - return df_to_list_pv_system(pv_systems_missing) + return pv_systems_missing def get_pv_systems( session: Session, provider: str, filename: Optional[str] = None -) -> List[PVSystemSQL]: +) -> List[SiteSQL]: """ Get PV systems @@ -99,19 +101,19 @@ def get_pv_systems( """ # load all pv systems in database - pv_systems_sql_db: List[PVSystemSQL] = get_pv_systems_from_db( - provider=provider, session=session - ) + pv_systems_sql_db: List[SiteSQL] = get_all_sites(session=session) - pv_systems_db = [PVSystem.from_orm(pv_system) for pv_system in pv_systems_sql_db] + # convert to sql objects to Pandas datafraome + pv_systems_db_df = pd.DataFrame([pv_system.__dict__ for pv_system in pv_systems_sql_db]) # load master file - pv_system_local = load_pv_systems(filename=filename, provider=provider) + pv_system_local_df = load_pv_systems(filename=filename, provider=provider) # get missing pv systems missing_pv_system = find_missing_pv_systems( - pv_systems_local=pv_system_local, pv_systems_db=pv_systems_db, provider=provider + pv_systems_local=pv_system_local_df, pv_systems_db=pv_systems_db_df, provider=provider ) + logger.debug(missing_pv_system) logger.debug(f"There are {len(missing_pv_system)} pv systems to add to the database") @@ -123,7 +125,10 @@ def get_pv_systems( pv_systems = get_all_systems_from_solar_sheffield() else: raise Exception(f"Can not use provider {provider}") - for i, pv_system in enumerate(missing_pv_system): + + logger.debug(missing_pv_system) + for i, pv_system in missing_pv_system.iterrows(): + logger.debug(pv_system) # get metadata if provider == pv_output: metadata = pv_output_data.get_metadata( @@ -139,39 +144,42 @@ def get_pv_systems( pv_system.latitude = metadata.latitude pv_system.longitude = metadata.longitude pv_system.status_interval_minutes = int(metadata.status_interval_minutes) + pv_system.capacity_kw = metadata.system_DC_capacity_W / 1000 elif provider == solar_sheffield_passiv: - pv_system = [s for s in pv_systems if s.pv_system_id == pv_system.pv_system_id][0] + pv_system = pv_systems[pv_systems["pv_system_id"] == pv_system.pv_system_id].iloc[0] else: raise Exception(f"Can not use provider {provider}") - # validate - _ = PVSystem.from_orm(pv_system) + # get the current max ml id, small chance this could lead to a raise condition + max_ml_id = session.query(func.max(SiteSQL.ml_id)).scalar() + if max_ml_id is None: + max_ml_id = 0 + + site = SiteSQL( + client_site_id=str(pv_system.pv_system_id), + client_site_name=f"{provider}_{pv_system.pv_system_id}", + latitude=pv_system.latitude, + longitude=pv_system.longitude, + capacity_kw=pv_system.capacity_kw, + ml_id=max_ml_id + 1, + ) # add to database logger.debug(f"Adding pv system {pv_system.pv_system_id} to database") - session.add(pv_system.to_orm()) + session.add(site) # The first time we do this, we might hit a rate limit of 900, # therefore its good to save this on the go session.commit() - pv_systems = get_pv_systems_from_db(provider=provider, session=session) - - yesterday = datetime.now(timezone.utc).date() - timedelta(days=1) - pv_systems = get_latest_pv_yield( - session=session, - append_to_pv_systems=True, - pv_systems=pv_systems, - start_datetime_utc=yesterday, - start_created_utc=yesterday, - ) + pv_systems_sql_db: List[SiteSQL] = get_all_sites(session=session) - return pv_systems + return pv_systems_sql_db def filter_pv_systems_which_have_new_data( - pv_systems: List[PVSystemSQL], datetime_utc: Optional[datetime] = None + session: Session, pv_systems: List[SiteSQL], datetime_utc: Optional[datetime] = None ): """ Filter pv systems which have new data available @@ -186,6 +194,7 @@ def filter_pv_systems_which_have_new_data( Args: pv_systems: list of pv systems datetime_utc: the datetime now + session: database session Returns: list of pv systems that have new data. @@ -199,43 +208,62 @@ def filter_pv_systems_which_have_new_data( if datetime_utc is None: datetime_utc = datetime.utcnow() # add timezone + site_uuids = [pv_system.site_uuid for pv_system in pv_systems] + + # pull the latest data from the database + query = ( + session.query(SiteSQL.site_uuid, GenerationSQL.start_utc) + .distinct( + GenerationSQL.site_uuid, + # GenerationSQL.start_utc, + ) + .join(SiteSQL) + .filter( + GenerationSQL.start_utc <= datetime_utc, + GenerationSQL.start_utc >= datetime_utc - timedelta(days=1), + GenerationSQL.site_uuid.in_(site_uuids), + ) + .order_by( + GenerationSQL.site_uuid, + GenerationSQL.start_utc, + GenerationSQL.created_utc.desc(), + ) + ) + last_generations = query.all() + last_generations = {row[0]: row[1] for row in last_generations} + keep_pv_systems = [] for i, pv_system in enumerate(pv_systems): logger.debug(f"Looking at {i}th pv system, out of {len(pv_systems)} pv systems") - last_pv_yield = pv_system.last_pv_yield + if pv_system.site_uuid in last_generations: + last_datetime = last_generations[pv_system.site_uuid] + else: + last_datetime = None - if pv_system.status_interval_minutes is None: - # don't know the status interval refresh time, so lets keep it - logger.debug( - f"We dont know the refresh time for pv systems {pv_system.pv_system_id}, " - f"so will be getting data " - ) - keep_pv_systems.append(pv_system) - elif last_pv_yield is None: + if last_datetime is None: # there is no pv yield data for this pv system, so lets keep it logger.debug( - f"There is no pv yield data for pv systems {pv_system.pv_system_id}, " + f"There is no pv yield data for pv systems {pv_system.site_uuid}, " f"so will be getting data " ) keep_pv_systems.append(pv_system) else: - next_datetime_data_available = ( - timedelta(minutes=pv_system.status_interval_minutes) + last_pv_yield.datetime_utc - ) + next_datetime_data_available = timedelta(minutes=5) + last_datetime + logger.debug(next_datetime_data_available) if next_datetime_data_available < datetime_utc: logger.debug( - f"For pv system {pv_system.pv_system_id} as " - f"last pv yield datetime is {last_pv_yield.datetime_utc}," - f"refresh interval is {pv_system.status_interval_minutes}, " - f"so will be getting data" + f"For pv system {pv_system.site_uuid} as " + f"last pv yield datetime is {last_datetime}," + f"refresh interval is 5 minutes, " + f"so will be getting data, {next_datetime_data_available=}" ) keep_pv_systems.append(pv_system) else: logger.debug( - f"Not keeping pv system {pv_system.pv_system_id} as " - f"last pv yield datetime is {last_pv_yield.datetime_utc}," - f"refresh interval is {pv_system.status_interval_minutes}" + f"Not keeping pv system {pv_system.site_uuid} as " + f"last pv yield datetime is {last_datetime}," + f"refresh interval is 5 minutes" ) return keep_pv_systems diff --git a/pvconsumer/save.py b/pvconsumer/save.py index 8a546c1..4ea6e06 100644 --- a/pvconsumer/save.py +++ b/pvconsumer/save.py @@ -1,30 +1,14 @@ """ Save to database functions""" import logging -from typing import List import pandas as pd -from nowcasting_datamodel.models import PVSystem, PVYield -from pvsite_datamodel.read.site import get_site_by_client_site_id from pvsite_datamodel.write.generation import insert_generation_values from sqlalchemy.orm import Session logger = logging.getLogger(__name__) -def save_to_database(session: Session, pv_yields: List[PVYield]): - """ - Save pv data to database - - :param session: database session - :param pv_yields: list of pv data - """ - logger.debug(f"Will be adding {len(pv_yields)} pv yield object to database") - - session.add_all(pv_yields) - session.commit() - - -def save_to_pv_site_database(session: Session, pv_system: PVSystem, pv_yield_df: pd.DataFrame): +def save_to_pv_site_database(session: Session, pv_yield_df: pd.DataFrame): """ Save to pv site database @@ -41,15 +25,17 @@ def save_to_pv_site_database(session: Session, pv_system: PVSystem, pv_yield_df: if len(pv_yield_df) == 0: return - # get site from the pv_system - site = get_site_by_client_site_id( - session=session, - client_name=pv_system.provider, - client_site_id=pv_system.pv_system_id, - ) - # format dataframe - pv_yield_df["site_uuid"] = site.site_uuid + if "instantaneous_power_gen_W" in pv_yield_df.columns: + pv_yield_df["solar_generation_kw"] = pv_yield_df["instantaneous_power_gen_W"] / 1000 + if "datetime_utc" not in pv_yield_df.columns: + pv_yield_df.rename( + columns={ + "datetime": "datetime_utc", + }, + inplace=True, + ) + pv_yield_df["power_kw"] = pv_yield_df["solar_generation_kw"] pv_yield_df["end_utc"] = pv_yield_df["datetime_utc"] # TODO this is hard coded for Sheffield Solar Passiv @@ -58,3 +44,4 @@ def save_to_pv_site_database(session: Session, pv_system: PVSystem, pv_yield_df: # save to database logger.debug(f"Inserting {len(pv_yield_df)} records to pv site database") insert_generation_values(session, pv_yield_df) + session.commit() diff --git a/pvconsumer/solar_sheffield_passiv.py b/pvconsumer/solar_sheffield_passiv.py index 4ef5a5c..c3df4fb 100644 --- a/pvconsumer/solar_sheffield_passiv.py +++ b/pvconsumer/solar_sheffield_passiv.py @@ -7,9 +7,6 @@ import pandas as pd import requests -from nowcasting_datamodel.models.pv import PVSystem - -from pvconsumer.utils import df_to_list_pv_system logger = logging.getLogger(__name__) @@ -31,7 +28,7 @@ def raw_to_dataframe(response): return pd.DataFrame(data=data, columns=columns) -def get_all_systems_from_solar_sheffield(pv_system_ids: List[int] = None) -> List[PVSystem]: +def get_all_systems_from_solar_sheffield(pv_system_ids: List[int] = None) -> pd.DataFrame: """ Get the pv systesm from solar sheffield @@ -47,7 +44,7 @@ def get_all_systems_from_solar_sheffield(pv_system_ids: List[int] = None) -> Lis data_df = raw_to_dataframe(response=response) data_df.rename(columns={"system_id": "pv_system_id"}, inplace=True) - data_df.rename(columns={"kWp": "installed_capacity_kw"}, inplace=True) + data_df.rename(columns={"kWp": "capacity_kw"}, inplace=True) data_df.rename(columns={"longitude_rounded": "longitude"}, inplace=True) data_df.rename(columns={"latitude_rounded": "latitude"}, inplace=True) @@ -71,9 +68,7 @@ def get_all_systems_from_solar_sheffield(pv_system_ids: List[int] = None) -> Lis data_df = data_df[data_df["pv_system_id"].isin(pv_system_ids)] # reformat - pv_systems = df_to_list_pv_system(data_df) - - return pv_systems + return data_df def get_all_latest_pv_yield_from_solar_sheffield() -> pd.DataFrame: @@ -114,4 +109,7 @@ def get_all_latest_pv_yield_from_solar_sheffield() -> pd.DataFrame: # add timestamp UTC data_df["datetime_utc"] = data_df["datetime_utc"].dt.tz_localize("UTC") + # only take Passiv data + data_df = data_df[data_df["owner_name"] == "Passiv"] + return data_df diff --git a/pvconsumer/utils.py b/pvconsumer/utils.py index 5aeb2d9..3bc6e3c 100644 --- a/pvconsumer/utils.py +++ b/pvconsumer/utils.py @@ -1,49 +1,21 @@ """ Utils functions """ import logging -from datetime import timezone -from typing import List +from datetime import datetime, timedelta, timezone import pandas as pd -from nowcasting_datamodel.models import PVSystem, PVSystemSQL, PVYield, PVYieldSQL +from pvsite_datamodel.sqlmodels import GenerationSQL, SiteSQL from sqlalchemy.orm import Session +# logger = logging.getLogger(__name__) -def list_pv_system_to_df(pv_systems: List[PVSystem]) -> pd.DataFrame: - """ - Change list of pv systems to dataframe - - Args: - pv_systems: list of pv systems (pdyantic objects) - - Returns: dataframe with columns the same as the pv systems pydantic object - - """ - return pd.DataFrame([pv_system.dict() for pv_system in pv_systems]) - - -def df_to_list_pv_system(pv_systems_df=pd.DataFrame) -> List[PVSystem]: - """ - Change dataframe to lsit of pv systems - - Args: - pv_systems_df: dataframe with columns the same as the pv systems pydantic object - - Returns: list of pv systems - - """ - return [PVSystem(**row) for row in pv_systems_df.to_dict(orient="records")] - - -def format_pv_data(pv_system: PVSystemSQL, pv_yield_df: pd.DataFrame) -> List[PVYieldSQL]: +def format_pv_data(pv_system: SiteSQL, pv_yield_df: pd.DataFrame, session: Session) -> pd.DataFrame: """ Format the pv data 1. get rid of 0 bug 2. remove data if already in our database - 3. format in to PVYield objects - 4. convert to SQL objects :param pv_system: the pv system this data is about :param pv_yield_df: the pv yield data with columns 'instantaneous_power_gen_W' and 'datetime' @@ -73,14 +45,29 @@ def format_pv_data(pv_system: PVSystemSQL, pv_yield_df: pd.DataFrame) -> List[PV ): logger.debug( f"Dropping last row of pv data for " - f"{pv_system.pv_system_id} " + f"{pv_system.client_site_id} " f"as last row is 0, but the second to last row is not." ) pv_yield_df.drop(pv_yield_df.tail(1).index, inplace=True) # 2. filter by last - if pv_system.last_pv_yield is not None: - last_pv_yield_datetime = pv_system.last_pv_yield.datetime_utc.replace(tzinfo=timezone.utc) + if len(pv_yield_df) > 0: + start_utc_filter = pv_yield_df["datetime_utc"].min() - timedelta(days=1) + else: + start_utc_filter = datetime.now() - timedelta(days=1) + + last_pv_generation = ( + session.query(GenerationSQL) + .filter(GenerationSQL.site_uuid == pv_system.site_uuid) + .join(SiteSQL) + .filter(SiteSQL.site_uuid == pv_system.site_uuid) + .filter(GenerationSQL.start_utc > start_utc_filter) + .order_by(GenerationSQL.created_utc.desc()) + .first() + ) + + if last_pv_generation is not None: + last_pv_yield_datetime = last_pv_generation.start_utc.replace(tzinfo=timezone.utc) pv_yield_df = pv_yield_df[pv_yield_df["datetime_utc"] > last_pv_yield_datetime] @@ -92,48 +79,11 @@ def format_pv_data(pv_system: PVSystemSQL, pv_yield_df: pd.DataFrame) -> List[PV logger.debug(pv_yield_df) else: logger.debug( - f"This is the first lot pv yield data for " f"pv system {(pv_system.pv_system_id)}" + f"This is the first lot pv yield data for pv system {(pv_system.client_site_id)}" ) - # 3. format in to PVYield objects - # need columns datetime_utc, solar_generation_kw - pv_yield_df = pv_yield_df[["solar_generation_kw", "datetime_utc"]] - - # change to list of pydantic objects - pv_yields = [PVYield(**row) for row in pv_yield_df.to_dict(orient="records")] - # 4. change to sqlalamcy objects and add pv systems - pv_yields_sql = [pv_yield.to_orm() for pv_yield in pv_yields] - for pv_yield_sql in pv_yields_sql: - pv_yield_sql.pv_system = pv_system - logger.debug(f"Found {len(pv_yields_sql)} pv yield for pv systems {pv_system.pv_system_id}") - - return pv_yields_sql - - -class FakeDatabaseConnection: - """Fake Database connection class""" - - def __init__(self): - """ - Set up fake database connection - - This is so we can still do - 'with connection.get_session() as sessions:' - bu session is None - """ - - class FakeSession: - def __init__(self): # noqa - pass - - def __enter__(self): # noqa - return None - - def __exit__(self, type, value, traceback): # noqa - pass + return pv_yield_df - self.Session = FakeSession - def get_session(self) -> Session: - """Get sqlalamcy session""" - return self.Session() +pv_output = "pvoutput.org" +solar_sheffield_passiv = "solar_sheffield_passiv" diff --git a/pyproject.toml b/pyproject.toml index cc5496b..f6e64d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ pvoutput-ocf = "0.1.17" click = "^8.1.3" requests = "^2.28.2" pandas = "^1.5.3" -nowcasting-datamodel = "1.1.54" pvsite-datamodel = "^0.1.29" pydantic = "1.10.5" diff --git a/tests/conftest.py b/tests/conftest.py index 749779c..58aee05 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,8 +3,9 @@ import pandas as pd import pytest -from nowcasting_datamodel.connection import DatabaseConnection -from nowcasting_datamodel.models.base import Base_Forecast, Base_PV +from pvsite_datamodel.connection import DatabaseConnection + +# from nowcasting_datamodel.connection import DatabaseConnection from pvsite_datamodel.sqlmodels import Base, GenerationSQL, SiteSQL from testcontainers.postgres import PostgresContainer @@ -34,13 +35,7 @@ def postgresql_database(bases: list): @pytest.fixture def db_connection(scope="session"): - with postgresql_database([Base_PV, Base]) as db_conn: - yield db_conn - - -@pytest.fixture(scope="session") -def db_connection_forecast(): - with postgresql_database([Base_Forecast]) as db_conn: + with postgresql_database([Base]) as db_conn: yield db_conn diff --git a/tests/intergration/test_app.py b/tests/intergration/test_app.py index de4df8c..27a648e 100644 --- a/tests/intergration/test_app.py +++ b/tests/intergration/test_app.py @@ -1,35 +1,24 @@ from click.testing import CliRunner -from nowcasting_datamodel.models.pv import PVSystem, PVSystemSQL, PVYieldSQL -from pvsite_datamodel.sqlmodels import GenerationSQL +from pvsite_datamodel.sqlmodels import GenerationSQL, SiteSQL from pvconsumer.app import app, pull_data_and_save def test_pull_data(db_session, sites): - pv_systems = [ - PVSystem(pv_system_id=10020, provider="pvoutput.org").to_orm(), - ] - pv_systems[0].last_pv_yield = None + pull_data_and_save(pv_systems=sites, session=db_session, provider="pvoutput.org") - pull_data_and_save(pv_systems=pv_systems, session=db_session, provider="pvoutput.org") - - pv_yields = db_session.query(PVYieldSQL).all() + pv_yields = db_session.query(GenerationSQL).all() assert len(pv_yields) > 0 def test_pull_data_solar_sheffield(db_session, sites): - pv_systems = [ - PVSystem(pv_system_id=4383, provider="solar_sheffield_passiv").to_orm(), - ] - pv_systems[0].last_pv_yield = None - - pull_data_and_save(pv_systems=pv_systems, session=db_session, provider="solar_sheffield_passiv") + pull_data_and_save(pv_systems=sites, session=db_session, provider="solar_sheffield_passiv") - pv_yields = db_session.query(PVYieldSQL).all() + pv_yields = db_session.query(GenerationSQL).all() assert len(pv_yields) > 0 -def test_app(db_connection, db_connection_forecast, filename, sites): +def test_app(db_connection, filename, sites): runner = CliRunner() response = runner.invoke( app, @@ -38,24 +27,21 @@ def test_app(db_connection, db_connection_forecast, filename, sites): db_connection.url, "--filename", filename, - "--db-url-forecast", - db_connection_forecast.url, ], ) assert response.exit_code == 0, response.exception with db_connection.get_session() as session: - pv_systems = session.query(PVSystemSQL).all() - _ = PVSystem.from_orm(pv_systems[0]) - assert len(pv_systems) == 20 + pv_systems = session.query(SiteSQL).all() + assert len(pv_systems) == 30 - pv_yields = session.query(PVYieldSQL).all() + pv_yields = session.query(GenerationSQL).all() assert len(pv_yields) > 7 # the app gets multiple values for each pv system. # There is a chance this will fail in the early morning when no data is available -def test_app_ss(db_connection, db_connection_forecast, filename_solar_sheffield, sites): +def test_app_ss(db_connection, filename_solar_sheffield, sites): runner = CliRunner() response = runner.invoke( app, @@ -64,10 +50,6 @@ def test_app_ss(db_connection, db_connection_forecast, filename_solar_sheffield, db_connection.url, "--filename", filename_solar_sheffield, - "--db-url-forecast", - db_connection_forecast.url, - "--db-url-pv-site", - db_connection.url, "--provider", "solar_sheffield_passiv", ], @@ -75,12 +57,8 @@ def test_app_ss(db_connection, db_connection_forecast, filename_solar_sheffield, assert response.exit_code == 0, response.exception with db_connection.get_session() as session: - pv_systems = session.query(PVSystemSQL).all() - _ = PVSystem.from_orm(pv_systems[0]) - assert len(pv_systems) == 10 - - pv_yields = session.query(PVYieldSQL).all() - assert len(pv_yields) >= 9 + pv_systems = session.query(SiteSQL).all() + assert len(pv_systems) == 30 # the app gets multiple values for each pv system. # There is a chance this will fail in the early morning when no data is available diff --git a/tests/intergration/test_ss_passiv.py b/tests/intergration/test_ss_passiv.py index c4cb5bf..dd6c85e 100644 --- a/tests/intergration/test_ss_passiv.py +++ b/tests/intergration/test_ss_passiv.py @@ -1,9 +1,26 @@ from datetime import datetime, timedelta, timezone +import pytest + +from pvconsumer.pv_systems import get_pv_systems from pvconsumer.solar_sheffield_passiv import ( get_all_latest_pv_yield_from_solar_sheffield, get_all_systems_from_solar_sheffield, ) +from pvconsumer.utils import solar_sheffield_passiv + + +def test_get_pv_systems_ss(db_session, filename): + pv_systems = get_pv_systems( + session=db_session, filename=filename, provider=solar_sheffield_passiv + ) + + assert len(pv_systems) > 0 + + +def test_test_get_pv_systems_error(db_session, filename): + with pytest.raises(Exception): + _ = get_pv_systems(session=db_session, filename=filename, provider="fake") def test_get_all_systems(): @@ -12,7 +29,7 @@ def test_get_all_systems(): # these numbers seem to change over time assert len(pv_systems) >= 56824 assert len(pv_systems) <= 57200 - assert pv_systems[0].installed_capacity_kw is not None + assert pv_systems.iloc[0].capacity_kw is not None def test_get_all_systems_filter(): diff --git a/tests/unittest/test_pv_systems.py b/tests/unittest/test_pv_systems.py index b6fd999..cfd4d2e 100644 --- a/tests/unittest/test_pv_systems.py +++ b/tests/unittest/test_pv_systems.py @@ -1,14 +1,15 @@ -from datetime import datetime, timezone -from typing import List +import uuid +from datetime import datetime -from nowcasting_datamodel.models.pv import PVSystem, PVSystemSQL, PVYield, solar_sheffield_passiv -from nowcasting_datamodel.read.read_pv import get_latest_pv_yield +import pandas as pd +from pvsite_datamodel.sqlmodels import GenerationSQL, SiteSQL from pvconsumer.pv_systems import ( filter_pv_systems_which_have_new_data, find_missing_pv_systems, load_pv_systems, ) +from pvconsumer.utils import solar_sheffield_passiv def test_load_pv_systems(): @@ -20,15 +21,19 @@ def test_load_pv_systems_passiv(): def test_find_missing_pv_systems(): - pv_systems_local = [ - PVSystem(pv_system_id=1, provider="pvoutput.org"), - PVSystem(pv_system_id=2, provider="pvoutput.org"), - PVSystem(pv_system_id=3, provider="pvoutput.org"), - ] + pv_systems_local = pd.DataFrame( + [ + dict(pv_system_id=1, provider="pvoutput.org"), + dict(pv_system_id=2, provider="pvoutput.org"), + dict(pv_system_id=3, provider="pvoutput.org"), + ] + ) - pv_systems_db = [ - PVSystem(pv_system_id=1, provider="pvoutput.org"), - ] + pv_systems_db = pd.DataFrame( + [ + dict(client_site_id=1, provider="pvoutput.org"), + ] + ) pv_systems_missing = find_missing_pv_systems( pv_systems_local=pv_systems_local, @@ -39,72 +44,49 @@ def test_find_missing_pv_systems(): assert len(pv_systems_missing) == 2 -def test_filter_pv_systems_which_have_new_data_no_refresh_interval(db_session): +def test_filter_pv_systems_which_have_no_datal(db_session): pv_systems = [ - PVSystem(pv_system_id=1, provider="pvoutput.org").to_orm(), - PVSystem(pv_system_id=2, provider="pvoutput.org").to_orm(), - PVSystem(pv_system_id=3, provider="pvoutput.org").to_orm(), + SiteSQL(site_uuid=uuid.uuid4()), + SiteSQL(site_uuid=uuid.uuid4()), + SiteSQL(site_uuid=uuid.uuid4()), ] - pv_systems = get_latest_pv_yield( - session=db_session, pv_systems=pv_systems, append_to_pv_systems=True + pv_systems_keep = filter_pv_systems_which_have_new_data( + pv_systems=pv_systems, session=db_session ) - pv_systems_keep = filter_pv_systems_which_have_new_data(pv_systems=pv_systems) - assert len(pv_systems_keep) == 3 -def test_filter_pv_systems_which_have_new_data_no_data(db_session): - pv_systems = [ - PVSystem(pv_system_id=1, provider="pvoutput.org", status_interval_minutes=5).to_orm(), - PVSystem(pv_system_id=2, provider="pvoutput.org", status_interval_minutes=5).to_orm(), - PVSystem(pv_system_id=3, provider="pvoutput.org", status_interval_minutes=5).to_orm(), - ] - - pv_systems = get_latest_pv_yield( - session=db_session, pv_systems=pv_systems, append_to_pv_systems=True +def test_filter_pv_systems_which_have_new_data(db_session, sites): + pv_yield_0 = GenerationSQL( + start_utc=datetime(2022, 1, 1), end_utc=datetime(2022, 1, 1, 0, 5), generation_power_kw=1 + ) + pv_yield_1 = GenerationSQL( + start_utc=datetime(2022, 1, 1, 0, 4), + end_utc=datetime(2022, 1, 1, 0, 5), + generation_power_kw=3, ) - pv_systems_keep = filter_pv_systems_which_have_new_data(pv_systems=pv_systems) - - assert len(pv_systems_keep) == 3 - - -def test_filter_pv_systems_which_have_new_data(db_session): - pv_yield_0 = PVYield(datetime_utc=datetime(2022, 1, 1), solar_generation_kw=1).to_orm() - pv_yield_1 = PVYield(datetime_utc=datetime(2022, 1, 1), solar_generation_kw=2).to_orm() - pv_yield_2 = PVYield(datetime_utc=datetime(2022, 1, 1, 0, 4), solar_generation_kw=3).to_orm() - - pv_systems = [ - PVSystem(pv_system_id=1, provider="pvoutput.org", status_interval_minutes=4).to_orm(), - PVSystem(pv_system_id=2, provider="pvoutput.org", status_interval_minutes=1).to_orm(), - PVSystem(pv_system_id=3, provider="pvoutput.org", status_interval_minutes=5).to_orm(), - ] - - pv_yield_0.pv_system = pv_systems[0] - pv_yield_1.pv_system = pv_systems[1] - pv_yield_2.pv_system = pv_systems[2] + pv_yield_0.site = sites[0] + pv_yield_1.site = sites[1] - db_session.add_all([pv_yield_0, pv_yield_1, pv_yield_2]) - db_session.add_all(pv_systems) + db_session.add_all([pv_yield_0, pv_yield_1]) + db_session.commit() - pv_systems: List[PVSystemSQL] = db_session.query(PVSystemSQL).all() - pv_systems = get_latest_pv_yield( - session=db_session, pv_systems=pv_systems, append_to_pv_systems=True - ) + assert len(sites) == 30 # - # | last data | refresh | keep? - # 1 | 5 mins | 4 mins | True - # 2 | 5 mins | 1 mins | True - # 3 | 1 mins | 5 mins | False + # | last data | keep? + # 1 | 6 mins | True + # 2 | 2 mins | False pv_systems_keep = filter_pv_systems_which_have_new_data( - pv_systems=pv_systems, - datetime_utc=datetime(2022, 1, 1, 0, 5, tzinfo=timezone.utc), + pv_systems=sites, + session=db_session, + datetime_utc=datetime(2022, 1, 1, 0, 6), ) - assert len(pv_systems_keep) == 2 - assert pv_systems_keep[0].id == 1 - assert pv_systems_keep[1].id == 2 + assert len(pv_systems_keep) == 29 + assert pv_systems_keep[0].site_uuid == sites[0].site_uuid + assert pv_systems_keep[1].site_uuid == sites[2].site_uuid diff --git a/tests/unittest/test_utils.py b/tests/unittest/test_utils.py index a8cef87..dd9602c 100644 --- a/tests/unittest/test_utils.py +++ b/tests/unittest/test_utils.py @@ -1,57 +1,34 @@ from datetime import datetime, timezone import pandas as pd -from nowcasting_datamodel.fake import make_fake_pv_system -from nowcasting_datamodel.models.pv import PVSystem, PVYield +from pvsite_datamodel.sqlmodels import GenerationSQL -from pvconsumer.utils import df_to_list_pv_system, format_pv_data, list_pv_system_to_df +from pvconsumer.utils import format_pv_data -def test_list_pv_system_to_df(): - pv_systems_1 = PVSystem.from_orm(make_fake_pv_system()) - pv_systems_2 = PVSystem.from_orm(make_fake_pv_system()) - - _ = list_pv_system_to_df([pv_systems_1, pv_systems_2]) - - -def test_df_to_list_pv_system(): - pv_systems_1 = PVSystem.from_orm(make_fake_pv_system()) - pv_systems_2 = PVSystem.from_orm(make_fake_pv_system()) - - df = list_pv_system_to_df([pv_systems_1, pv_systems_2]) - _ = df_to_list_pv_system(df) - - -def test_pv_yield_df_no_data(): - pv_systems = [ - PVSystem(pv_system_id=10020, provider="pvoutput.org").to_orm(), - ] - pv_systems[0].last_pv_yield = None - +def test_pv_yield_df_no_data(db_session, sites): pv_yield_df = pd.DataFrame(columns=["instantaneous_power_gen_W", "datetime"]) - pv_yields = format_pv_data(pv_system=pv_systems[0], pv_yield_df=pv_yield_df) + pv_yields = format_pv_data(pv_system=sites[0], pv_yield_df=pv_yield_df, session=db_session) assert len(pv_yields) == 0 -def test_pv_yield_df(): - pv_system = PVSystem(pv_system_id=10020, provider="pvoutput.org").to_orm() - pv_system.last_pv_yield = None - +def test_pv_yield_df(sites, db_session): pv_yield_df = pd.DataFrame( columns=["instantaneous_power_gen_W", "datetime"], data=[[1, datetime(2022, 1, 1)]] ) - pv_yields = format_pv_data(pv_system=pv_system, pv_yield_df=pv_yield_df) + pv_yields = format_pv_data(pv_system=sites[0], pv_yield_df=pv_yield_df, session=db_session) assert len(pv_yields) == 1 - assert pv_yields[0].solar_generation_kw == 1 / 1000 - + assert pv_yields.iloc[0].solar_generation_kw == 1 / 1000 -def test_pv_yield_df_last_pv_yield(): - pv_system = PVSystem(pv_system_id=10020, provider="pvoutput.org").to_orm() - last_pv_yield = PVYield(datetime_utc=datetime(2022, 1, 1), solar_generation_kw=10) - pv_system.last_pv_yield = last_pv_yield +def test_pv_yield_df_last_pv_yield(sites, db_session): + last_pv_yield = GenerationSQL( + start_utc=datetime(2022, 1, 1), end_utc=datetime(2022, 1, 1), generation_power_kw=10 + ) + last_pv_yield.site = sites[0] + db_session.add(last_pv_yield) pv_yield_df = pd.DataFrame( columns=["instantaneous_power_gen_W", "datetime"], @@ -61,14 +38,18 @@ def test_pv_yield_df_last_pv_yield(): ], ) - pv_yields = format_pv_data(pv_system=pv_system, pv_yield_df=pv_yield_df) + pv_yields = format_pv_data(pv_system=sites[0], pv_yield_df=pv_yield_df, session=db_session) assert len(pv_yields) == 1 - assert pv_yields[0].solar_generation_kw == 2 / 1000 + assert pv_yields.iloc[0].solar_generation_kw == 2 / 1000 -def test_pv_yield_df_0_bug(): - pv_system = PVSystem(pv_system_id=10020, provider="pvoutput.org").to_orm() - pv_system.last_pv_yield = None +# +def test_pv_yield_df_0_bug(sites, db_session): + last_pv_yield = GenerationSQL( + start_utc=datetime(2021, 1, 1), end_utc=datetime(2021, 1, 1), generation_power_kw=10 + ) + last_pv_yield.site = sites[0] + db_session.add(last_pv_yield) pv_yield_df = pd.DataFrame( columns=["instantaneous_power_gen_W", "datetime"], @@ -78,15 +59,12 @@ def test_pv_yield_df_0_bug(): ], ) - pv_yields = format_pv_data(pv_system=pv_system, pv_yield_df=pv_yield_df) + pv_yields = format_pv_data(pv_system=sites[0], pv_yield_df=pv_yield_df, session=db_session) assert len(pv_yields) == 1 - assert pv_yields[0].solar_generation_kw == 1 / 1000 - + assert pv_yields.iloc[0].solar_generation_kw == 1 / 1000 -def test_pv_yield_df_zeros(): - pv_system = PVSystem(pv_system_id=10020, provider="pvoutput.org").to_orm() - pv_system.last_pv_yield = None +def test_pv_yield_df_zeros(sites, db_session): pv_yield_df = pd.DataFrame( columns=["instantaneous_power_gen_W", "datetime"], data=[ @@ -95,6 +73,6 @@ def test_pv_yield_df_zeros(): ], ) - pv_yields = format_pv_data(pv_system=pv_system, pv_yield_df=pv_yield_df) + pv_yields = format_pv_data(pv_system=sites[0], pv_yield_df=pv_yield_df, session=db_session) assert len(pv_yields) == 2 - assert pv_yields[0].solar_generation_kw == 0 + assert pv_yields.iloc[0].solar_generation_kw == 0