diff --git a/notebooks/Exploring_API_Functions_Authentication.ipynb b/notebooks/Exploring_API_Functions_Authentication.ipynb index bb03e1c..85c023a 100644 --- a/notebooks/Exploring_API_Functions_Authentication.ipynb +++ b/notebooks/Exploring_API_Functions_Authentication.ipynb @@ -250,23 +250,6 @@ "opc_data = OPC_UA(rest_url=opcua_rest_url, opcua_url=opcua_server_url, namespaces=namespace_list, auth_client=auth_client)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# History data\n", - "inverters_hist_df = opc_data.get_historical_aggregated_values(\n", - " start_time=datetime.datetime.now() - datetime.timedelta(2),\n", - " end_time=datetime.datetime.now() - datetime.timedelta(1),\n", - " pro_interval=600000,\n", - " agg_name=\"Average\",\n", - " variable_list=inverters.variables_as_list([\"DCPower\"]),\n", - ")\n", - "inverters_hist_df" - ] - }, { "cell_type": "code", "execution_count": null, @@ -286,32 +269,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Historic value data of trackers, 1 days worth of data 30 days ago\n", - "one_day_historic_tracker_data = opc_data.get_historical_aggregated_values(\n", - " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", - " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", - " pro_interval=3600000,\n", - " agg_name=\"Average\",\n", - " variable_list=string_sets_for_first_park.variables_as_list([\"DCPower\"]),\n", - ")\n", - "one_day_historic_tracker_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 1 day aggregated historical inverter data in asyncio process\n", - "one_days_historic_inverter_data2 = await opc_data.get_historical_aggregated_values_async(\n", - " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", - " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", - " pro_interval=60*1000,\n", - " agg_name=\"Average\",\n", + "# 1 day raw historical data\n", + "one_day_raw_historical_data = await opc_data.get_raw_historical_values_async(\n", + " start_time = datetime.datetime(2023, 11, 13, 00, 00),\n", + " end_time = datetime.datetime(2023, 11, 13, 23, 59),\n", " variable_list=string_sets_for_first_park.variables_as_list([\"DCPower\"])\n", ")\n", - "one_days_historic_inverter_data2" + "one_day_raw_historical_data" ] }, { @@ -321,9 +285,9 @@ "outputs": [], "source": [ "# 1 day aggregated historical data\n", - "one_day_historical_data = await opc_data.get_historical_aggregated_values_batch_time_vars_async(\n", - " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", - " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", + "one_day_historical_data = await opc_data.get_historical_aggregated_values_async(\n", + " start_time = datetime.datetime(2023, 11, 13, 00, 00),\n", + " end_time = datetime.datetime(2023, 11, 13, 23, 59),\n", " pro_interval=60*1000,\n", " agg_name=\"Average\",\n", " variable_list=string_sets_for_first_park.variables_as_list([\"DCPower\"])\n", diff --git a/notebooks/api_performance_testing.ipynb b/notebooks/api_performance_testing.ipynb new file mode 100644 index 0000000..89517a2 --- /dev/null +++ b/notebooks/api_performance_testing.ipynb @@ -0,0 +1,2189 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook explores both model index and opc ua scripts and contain examples of all the functions to make request to model index api and opc ua api servers. " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the required packeages\n", + "import pandas as pd\n", + "import os\n", + "import json\n", + "import datetime\n", + "import concurrent.futures\n", + "from dotenv import load_dotenv\n", + "from pathlib import Path\n", + "from dateutil.relativedelta import relativedelta" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import Scripts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import model index functions\n", + "from pyprediktormapclient.model_index import ModelIndex\n", + "\n", + "# Import OPC UA functions\n", + "from pyprediktormapclient.opc_ua import OPC_UA\n", + "\n", + "# Import Analytics Helper\n", + "from pyprediktormapclient.analytics_helper import AnalyticsHelper\n", + "\n", + "# Import \"Dataframer\" Tools\n", + "from pyprediktormapclient.shared import *\n", + "\n", + "# import AUTH_CLIENT\n", + "from pyprediktormapclient.auth_client import AUTH_CLIENT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Consider obtaining the envrionment variables from .env file if you are running this locally from source.\n", + "dotenv_path = Path(\".env\")\n", + "load_dotenv(dotenv_path=dotenv_path)\n", + "\n", + "username = os.environ[\"USERNAME\"]\n", + "password = os.environ[\"PASSWORD\"]\n", + "opcua_rest_url = os.environ[\"OPC_UA_REST_URL\"]\n", + "opcua_server_url = os.environ[\"OPC_UA_SERVER_URL\"]\n", + "model_index_url = os.environ[\"MODEL_INDEX_URL\"]\n", + "ory_url = os.environ[\"ORY_URL\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Getting ory bearer token\n", + "auth_client = AUTH_CLIENT(rest_url=ory_url, username=username, password=password)\n", + "auth_client.request_new_ory_token()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Connecting to ModelIndex APIs \n", + "model = ModelIndex(url=model_index_url, auth_client=auth_client, session=auth_client.session)\n", + "\n", + "# Listed sites on the model index api server\n", + "namespaces = model.get_namespace_array()\n", + "# Types of Objects\n", + "object_types_json = model.get_object_types()\n", + "object_types = AnalyticsHelper(object_types_json)\n", + "namespace_list = object_types.namespaces_as_list(namespaces)\n", + "\n", + "# Initate the OPC UA API with a fixed namespace list\n", + "opc_data = OPC_UA(rest_url=opcua_rest_url, opcua_url=opcua_server_url, namespaces=namespace_list, auth_client=auth_client)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download data from modelindex api" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Unique types of Objects\n", + "object_types_unique = object_types.dataframe[[\"Id\", \"Name\"]].drop_duplicates()\n", + "object_types_unique" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# To get the objects of a type\n", + "sites_json = model.get_objects_of_type(\"SiteType\")\n", + "\n", + "# Send the returned JSON into a normalizer to get Id, Type, Name, Props and Vars as columns\n", + "sites = AnalyticsHelper(sites_json)\n", + "sites.list_of_names()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Analytics helper\n", + "sites.variables_as_dataframe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sites.list_of_ids()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Selecting the second site\n", + "first_site_id = sites.list_of_ids()[0]\n", + "# first_site_id = '14:1:BE.DK-ADU'\n", + "first_site_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get all stringsets for one park\n", + "string_sets_for_first_park_as_json = model.get_object_descendants(\n", + " \"StringSetType\", [first_site_id], \"PV_Assets\"\n", + ")\n", + "string_sets = AnalyticsHelper(string_sets_for_first_park_as_json)\n", + "string_sets.dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Query Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "variable_list =string_sets.variables_as_list([\"DCPower\"])\n", + "start_time = datetime.datetime(2023, 11, 13, 00, 00)\n", + "end_time = datetime.datetime(2023, 11, 13, 23, 59)\n", + "pro_interval=60*1000\n", + "agg_name=\"Average\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batching with Async Refactoring" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import asyncio\n", + "import aiohttp\n", + "from aiohttp import ClientSession\n", + "from asyncio import Semaphore\n", + "from datetime import timedelta\n", + "from typing import Dict, List, Tuple\n", + "\n", + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "async def generate_time_batches(start_time: datetime, end_time: datetime, pro_interval: int, max_data_points: int) -> List[tuple]:\n", + " \"\"\"Generate time batches based on start time, end time, processing interval, and batch size\"\"\"\n", + "\n", + " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", + " estimated_intervals = total_time_range_ms / pro_interval\n", + " \n", + " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", + " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", + "\n", + " time_batch_size_ms = total_time_range_ms / max_time_batches\n", + "\n", + " return total_time_range_ms, max_variables_per_batch, time_batch_size_ms, max_time_batches\n", + "\n", + "def generate_variable_batches(start_time, end_time, pro_interval, variable_list: List[Dict[str, str]], max_data_points) -> List:\n", + " \"\"\"Generate variable batches based on the variable list and batch size\"\"\"\n", + "\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", + " max_variables_per_batch = generate_time_batches(start_time, end_time, pro_interval, max_data_points)[1]\n", + "\n", + " variable_batches = [\n", + " extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)\n", + " ]\n", + "\n", + " return variable_batches\n", + "\n", + "def _prepare_body(\n", + " start_time: datetime,\n", + " end_time: datetime,\n", + " pro_interval: int,\n", + " variable_list: List[Dict[str, str]], \n", + " agg_name: str,\n", + " ) -> Dict:\n", + " \"\"\"\n", + " Prepare the request body for the API call.\n", + " \"\"\"\n", + " total_time_range_ms, max_variables_per_batch, time_batch_size_ms, max_time_batches = generate_time_batches(\n", + " start_time, end_time, pro_interval, 10000)\n", + "\n", + " for time_batch in range(max_time_batches):\n", + " batch_start_ms = time_batch * time_batch_size_ms\n", + " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", + " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", + " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", + "\n", + " variable_batches = generate_variable_batches(variable_list)\n", + "\n", + " for variables in variable_batches:\n", + " body = {\n", + " **opc_data.body,\n", + " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"ProcessingInterval\": pro_interval,\n", + " \"ReadValueIds\": variables,\n", + " \"AggregateName\": agg_name\n", + " }\n", + " return body\n", + " \n", + "def process_batch(content: dict) -> pd.DataFrame:\n", + " \"\"\" Process individual batch of data \"\"\"\n", + " \n", + " df_list = []\n", + " for item in content[\"HistoryReadResults\"]:\n", + " df = pd.json_normalize(item[\"DataValues\"])\n", + " for key, value in item[\"NodeId\"].items():\n", + " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n", + " df_list.append(df)\n", + " \n", + " if df_list:\n", + " df_result = pd.concat(df_list)\n", + " df_result.reset_index(inplace=True, drop=True)\n", + " return df_result\n", + " else:\n", + " return pd.DataFrame()\n", + " \n", + "async def make_async_api_request(opc_data, start_time:datetime, end_time:datetime,\n", + " pro_interval: int, variable_list: List[Dict[str, str]], agg_name: str,\n", + " semaphore, max_retries: int = 3, retry_delay: int = 5) -> dict:\n", + " \n", + " \"\"\"Make API request for the given time range and variable list\"\"\"\n", + "\n", + " async with semaphore:\n", + " body = _prepare_body(\n", + " start_time, \n", + " end_time, \n", + " pro_interval, \n", + " variable_list,\n", + " agg_name\n", + " )\n", + " for attempt in range(max_retries):\n", + " try:\n", + " async with ClientSession() as session:\n", + " async with session.post(\n", + " f\"{opcua_rest_url}values/historicalaggregated\",\n", + " json=body,\n", + " headers=opc_data.headers\n", + " ) as response:\n", + " response.raise_for_status()\n", + " content = await response.json()\n", + " break\n", + " except aiohttp.ClientError as e:\n", + " if attempt < max_retries - 1:\n", + " wait_time = retry_delay * (2 ** attempt)\n", + " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", + " await asyncio.sleep(wait_time)\n", + " else:\n", + " logger.error(f\"Max retries reached. Error: {e}\")\n", + " raise RuntimeError(f'Error message {e}')\n", + "\n", + " opc_data._check_content(content)\n", + "\n", + " df_result = process_batch(content)\n", + " return df_result\n", + " \n", + "async def process_api_response(opc_data, start_time:datetime, end_time:datetime,\n", + " pro_interval: int, variable_list: List[Dict[str, str]], agg_name: str,\n", + " max_concurrent_requests: int = 10) -> pd.DataFrame:\n", + " \"\"\" Process API response asynchronously and return the result dataframe \"\"\"\n", + " all_results = []\n", + " semaphore = Semaphore(max_concurrent_requests)\n", + "\n", + " tasks = [\n", + " make_async_api_request(opc_data, start_time, end_time, pro_interval, variable_list, agg_name, semaphore)\n", + " ]\n", + " results = await asyncio.gather(*tasks)\n", + " all_results.extend(results)\n", + " \n", + " if all_results:\n", + " combined_df = pd.concat(all_results, ignore_index=True)\n", + " combined_df.reset_index(inplace=True, drop=True)\n", + " columns = {\n", + " \"Value.Type\": \"ValueType\",\n", + " \"Value.Body\": \"Value\",\n", + " \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " \"StatusCode.Code\": \"StatusCode\",\n", + " \"SourceTimestamp\": \"Timestamp\",\n", + " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " }\n", + " return opc_data._process_df(combined_df, columns)\n", + " else:\n", + " return pd.DataFrame()\n", + " \n", + "async def get_historical_aggregated_values_async(\n", + " opc_data,\n", + " start_time: datetime,\n", + " end_time: datetime,\n", + " pro_interval: int,\n", + " variable_list: List[Dict[str, str]],\n", + " agg_name: str,\n", + ") -> pd.DataFrame:\n", + " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n", + "\n", + " \n", + " result_df = await process_api_response(opc_data, start_time, end_time, pro_interval, variable_list, agg_name)\n", + "\n", + " return result_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1 day aggregated historical inverter data in asyncio process\n", + "one_days_historic_inverter_data2 = await get_historical_aggregated_values_batch_time_vars_async(\n", + " start_time=start_time,\n", + " end_time=end_time,\n", + " pro_interval=60*1000,\n", + " agg_name=\"Average\",\n", + " variable_list=string_sets.variables_as_list([\"DCPower\"])\n", + ")\n", + "one_days_historic_inverter_data2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batching with Async" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import asyncio\n", + "import aiohttp\n", + "from aiohttp import ClientSession\n", + "from asyncio import Semaphore\n", + "from datetime import timedelta\n", + "\n", + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def get_historical_aggregated_values_batch_time_vars_async(\n", + " self, \n", + " start_time: datetime, \n", + " end_time: datetime, \n", + " pro_interval: int, \n", + " agg_name: str, \n", + " variable_list: list, \n", + " max_data_points: int = 10000, \n", + " max_retries: int = 3, \n", + " retry_delay: int = 5, \n", + " max_concurrent_requests: int = 10\n", + ") -> pd.DataFrame:\n", + " \n", + " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n", + "\n", + " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", + " estimated_intervals = total_time_range_ms / pro_interval\n", + "\n", + " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", + " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", + " time_batch_size_ms = total_time_range_ms / max_time_batches\n", + "\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", + " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", + "\n", + " all_results = []\n", + " semaphore = Semaphore(max_concurrent_requests)\n", + "\n", + " async def process_batch(variables, time_batch):\n", + " async with semaphore:\n", + " batch_start_ms = time_batch * time_batch_size_ms\n", + " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", + " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", + " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", + "\n", + " body = {\n", + " **self.body,\n", + " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"ProcessingInterval\": pro_interval,\n", + " \"ReadValueIds\": variables,\n", + " \"AggregateName\": agg_name\n", + " }\n", + " print(body)\n", + "\n", + " for attempt in range(max_retries):\n", + " try:\n", + " async with ClientSession() as session:\n", + " async with session.post(\n", + " f\"{self.rest_url}values/historicalaggregated\",\n", + " json=body,\n", + " headers=self.headers\n", + " ) as response:\n", + " response.raise_for_status()\n", + " content = await response.json()\n", + " break\n", + " except aiohttp.ClientError as e:\n", + " if attempt < max_retries - 1:\n", + " wait_time = retry_delay * (2 ** attempt)\n", + " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", + " await asyncio.sleep(wait_time)\n", + " else:\n", + " logger.error(f\"Max retries reached. Error: {e}\")\n", + " raise RuntimeError(f'Error message {e}')\n", + "\n", + " self._check_content(content)\n", + "\n", + " df_list = []\n", + " for item in content[\"HistoryReadResults\"]:\n", + " df = pd.json_normalize(item[\"DataValues\"])\n", + " for key, value in item[\"NodeId\"].items():\n", + " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n", + " df_list.append(df)\n", + " \n", + " if df_list:\n", + " df_result = pd.concat(df_list)\n", + " df_result.reset_index(inplace=True, drop=True)\n", + " return df_result\n", + "\n", + " tasks = [\n", + " process_batch(variables, time_batch)\n", + " for variables in variable_batches\n", + " for time_batch in range(max_time_batches)\n", + " ]\n", + "\n", + " results = await asyncio.gather(*tasks)\n", + " all_results.extend(results)\n", + "\n", + " logger.info(\"Combining all batches...\")\n", + " combined_df = pd.concat(all_results, ignore_index=True)\n", + " columns = {\n", + " \"Value.Type\": \"ValueType\",\n", + " \"Value.Body\": \"Value\",\n", + " \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " \"StatusCode.Code\": \"StatusCode\",\n", + " \"SourceTimestamp\": \"Timestamp\",\n", + " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " }\n", + " return self._process_df(combined_df, columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1 day aggregated historical data\n", + "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n", + " opc_data,\n", + " start_time=start_time,\n", + " end_time=end_time,\n", + " pro_interval=pro_interval,\n", + " agg_name=agg_name,\n", + " variable_list=variable_list,\n", + " max_data_points=10000,\n", + " max_concurrent_requests=40\n", + ")\n", + "one_day_historical_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batching with Async for Raw Historical Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Dict, List, Any, Union, Optional" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def get_raw_historical_values_batch_time_vars_async(\n", + " self, \n", + " start_time: datetime, \n", + " end_time: datetime, \n", + " variable_list: list, \n", + " limit_start_index: Union[int, None] = None, \n", + " limit_num_records: Union[int, None] = None,\n", + " max_data_points: int = 10000, \n", + " max_retries: int = 3, \n", + " retry_delay: int = 5, \n", + " max_concurrent_requests: int = 10\n", + ") -> pd.DataFrame:\n", + " \n", + " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n", + "\n", + " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", + " estimated_intervals = total_time_range_ms / max_data_points\n", + "\n", + " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", + " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", + " time_batch_size_ms = total_time_range_ms / max_time_batches\n", + "\n", + " extended_variables = [{\"NodeId\": var} for var in variable_list]\n", + " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", + "\n", + " all_results = []\n", + " semaphore = Semaphore(max_concurrent_requests)\n", + "\n", + " async def process_batch(variables, time_batch):\n", + " async with semaphore:\n", + " batch_start_ms = time_batch * time_batch_size_ms\n", + " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", + " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", + " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", + "\n", + " body = {\n", + " **self.body,\n", + " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"ReadValueIds\": variables,\n", + " }\n", + " \n", + " if limit_start_index is not None and limit_num_records is not None:\n", + " body[\"Limit\"] = {\"StartIndex\": limit_start_index, \"NumRecords\": limit_num_records}\n", + "\n", + " for attempt in range(max_retries):\n", + " try:\n", + " async with ClientSession() as session:\n", + " async with session.post(\n", + " f\"{self.rest_url}values/historical\",\n", + " json=body,\n", + " headers=self.headers\n", + " ) as response:\n", + " response.raise_for_status()\n", + " content = await response.json()\n", + " break\n", + " except aiohttp.ClientError as e:\n", + " if attempt < max_retries - 1:\n", + " wait_time = retry_delay * (2 ** attempt)\n", + " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", + " await asyncio.sleep(wait_time)\n", + " else:\n", + " logger.error(f\"Max retries reached. Error: {e}\")\n", + " raise RuntimeError(f'Error message {e}')\n", + "\n", + " self._check_content(content)\n", + "\n", + " df_list = []\n", + " for item in content[\"HistoryReadResults\"]:\n", + " df = pd.json_normalize(item[\"DataValues\"])\n", + " for key, value in item[\"NodeId\"].items():\n", + " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n", + " df_list.append(df)\n", + " \n", + " if df_list:\n", + " df_result = pd.concat(df_list)\n", + " df_result.reset_index(inplace=True, drop=True)\n", + " return df_result\n", + "\n", + " tasks = [\n", + " process_batch(variables, time_batch)\n", + " for variables in variable_batches\n", + " for time_batch in range(max_time_batches)\n", + " ]\n", + "\n", + " results = await asyncio.gather(*tasks)\n", + " all_results.extend(results)\n", + "\n", + " logger.info(\"Combining all batches...\")\n", + " combined_df = pd.concat(all_results, ignore_index=True)\n", + " columns = {\n", + " \"Value.Type\": \"ValueType\",\n", + " \"Value.Body\": \"Value\",\n", + " \"SourceTimestamp\": \"Timestamp\",\n", + " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " }\n", + " return self._process_df(combined_df, columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1 day raw historical data\n", + "one_day_raw_historical_data = await get_raw_historical_values_batch_time_vars_async(\n", + " opc_data,\n", + " start_time=start_time,\n", + " end_time=end_time,\n", + " variable_list=variable_list,\n", + " max_data_points=10000,\n", + " max_concurrent_requests=35\n", + ")\n", + "one_day_raw_historical_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Async with ClientPool" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "import aiohttp\n", + "from aiohttp import ClientSession\n", + "from asyncio import Semaphore\n", + "from typing import List, Dict, Any\n", + "from datetime import datetime, timedelta\n", + "import pandas as pd\n", + "import logging\n", + "from pydantic import AnyUrl, ValidationError" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class ClientPool:\n", + " def __init__(self, num_clients: int, rest_url: str, headers: Dict[str, str]):\n", + " self.clients = asyncio.Queue()\n", + " for _ in range(num_clients):\n", + " self.clients.put_nowait(aiohttp.ClientSession(base_url=rest_url, headers=headers))\n", + " self.num_clients = num_clients\n", + "\n", + " async def get_client(self):\n", + " client = await self.clients.get()\n", + " return client\n", + "\n", + " async def release_client(self, client):\n", + " await self.clients.put(client)\n", + "\n", + " async def close_all(self):\n", + " while not self.clients.empty():\n", + " client = await self.clients.get()\n", + " await client.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def request_from_api_async(\n", + " client_pool: ClientPool,\n", + " method: str,\n", + " endpoint: str,\n", + " data: str = None,\n", + " params: Dict[str, Any] = None,\n", + " extended_timeout: bool = False,\n", + ") -> Dict[str, Any]:\n", + " timeout = aiohttp.ClientTimeout(total=300 if extended_timeout else 30)\n", + " client = await client_pool.get_client()\n", + " \n", + " try:\n", + " if method == \"GET\":\n", + " async with client.get(endpoint, params=params, timeout=timeout) as response:\n", + " response.raise_for_status()\n", + " if 'application/json' in response.headers.get('Content-Type', ''):\n", + " return await response.json()\n", + " else:\n", + " return {\"error\": \"Non-JSON response\", \"content\": await response.text()}\n", + " elif method == \"POST\":\n", + " async with client.post(endpoint, data=data, params=params, timeout=timeout) as response:\n", + " response.raise_for_status()\n", + " if 'application/json' in response.headers.get('Content-Type', ''):\n", + " return await response.json()\n", + " else:\n", + " return {\"error\": \"Non-JSON response\", \"content\": await response.text()}\n", + " else:\n", + " raise ValidationError(\"Unsupported method\")\n", + " finally:\n", + " await client_pool.release_client(client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def get_historical_aggregated_values_batch_time_vars_async(\n", + " self,\n", + " start_time: datetime,\n", + " end_time: datetime,\n", + " pro_interval: int,\n", + " agg_name: str,\n", + " variable_list: List[str],\n", + " max_data_points: int = 100000,\n", + " max_retries: int = 3,\n", + " retry_delay: int = 5,\n", + " max_concurrent_requests: int = 55\n", + ") -> pd.DataFrame:\n", + " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + " logger = logging.getLogger(__name__)\n", + "\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", + " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", + " estimated_intervals = total_time_range_ms / pro_interval\n", + " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", + " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", + " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", + " time_batch_size_ms = total_time_range_ms / max_time_batches\n", + "\n", + " all_results = []\n", + " semaphore = Semaphore(max_concurrent_requests)\n", + " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n", + "\n", + " async def process_batch(variables, time_batch):\n", + " async with semaphore:\n", + " batch_start_ms = time_batch * time_batch_size_ms\n", + " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", + " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", + " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", + "\n", + " body = {\n", + " **self.body,\n", + " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"ProcessingInterval\": pro_interval,\n", + " \"ReadValueIds\": variables,\n", + " \"AggregateName\": agg_name\n", + " }\n", + "\n", + " for attempt in range(max_retries):\n", + " try:\n", + " content = await request_from_api_async(\n", + " client_pool,\n", + " method=\"POST\",\n", + " endpoint=f\"/values/historicalaggregated\",\n", + " data=json.dumps(body, default=self.json_serial),\n", + " extended_timeout=True\n", + " )\n", + " break\n", + " except (aiohttp.ClientError, ValidationError) as e:\n", + " if attempt < max_retries - 1:\n", + " wait_time = retry_delay * (2 ** attempt)\n", + " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", + " await asyncio.sleep(wait_time)\n", + " else:\n", + " logger.error(f\"Max retries reached. Error: {e}\")\n", + " raise RuntimeError(f'Error message {e}')\n", + "\n", + " self._check_content(content)\n", + "\n", + " df_list = []\n", + " for item in content[\"HistoryReadResults\"]:\n", + " df = pd.json_normalize(item[\"DataValues\"])\n", + " for key, value in item[\"NodeId\"].items():\n", + " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n", + " df_list.append(df)\n", + " \n", + " if df_list:\n", + " df_result = pd.concat(df_list)\n", + " df_result.reset_index(inplace=True, drop=True)\n", + " return df_result\n", + "\n", + " tasks = [\n", + " process_batch(variables, time_batch)\n", + " for variables in variable_batches\n", + " for time_batch in range(max_time_batches)\n", + " ]\n", + "\n", + " try:\n", + " results = await asyncio.gather(*tasks)\n", + " all_results.extend(results)\n", + "\n", + " logger.info(\"Combining all batches...\")\n", + " combined_df = pd.concat(all_results, ignore_index=True)\n", + " columns = {\n", + " \"Value.Type\": \"ValueType\",\n", + " \"Value.Body\": \"Value\",\n", + " \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " \"StatusCode.Code\": \"StatusCode\",\n", + " \"SourceTimestamp\": \"Timestamp\",\n", + " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " }\n", + " return self._process_df(combined_df, columns)\n", + " finally:\n", + " await client_pool.close_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1 day aggregated historical data\n", + "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n", + " opc_data,\n", + " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", + " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", + " pro_interval=60*1000,\n", + " agg_name=\"Average\",\n", + " variable_list=string_sets.variables_as_list([\"DCPower\"]),\n", + " max_data_points=10000,\n", + " max_concurrent_requests=100\n", + ")\n", + "one_day_historical_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Async with Data Handler" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "import aiohttp\n", + "import pandas as pd\n", + "import sqlite3\n", + "import tempfile\n", + "import os\n", + "import json\n", + "from asyncio import Semaphore\n", + "from typing import List, Dict, Any\n", + "from datetime import datetime, timedelta\n", + "import logging\n", + "import pyarrow as pa\n", + "import pyarrow.parquet as pq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class DataHandler:\n", + " def __init__(self, max_memory_rows=10000):\n", + " self.max_memory_rows = max_memory_rows\n", + " self.temp_dir = tempfile.mkdtemp()\n", + " self.db_path = os.path.join(self.temp_dir, 'temp_data.db')\n", + " self.conn = sqlite3.connect(self.db_path)\n", + " self.conn.execute('''CREATE TABLE IF NOT EXISTS temp_data\n", + " (id INTEGER PRIMARY KEY AUTOINCREMENT,\n", + " batch_id TEXT,\n", + " data TEXT)''')\n", + "\n", + " async def save_data(self, batch_id: str, data: pd.DataFrame):\n", + " if len(data) <= self.max_memory_rows:\n", + " # Store small datasets directly in SQLite\n", + " self.conn.execute(\"INSERT INTO temp_data (batch_id, data) VALUES (?, ?)\",\n", + " (batch_id, data.to_json()))\n", + " else:\n", + " # Stream larger datasets to Parquet file\n", + " file_path = os.path.join(self.temp_dir, f\"batch_{batch_id}.parquet\")\n", + " table = pa.Table.from_pandas(data)\n", + " pq.write_table(table, file_path)\n", + " \n", + " # Store file path in SQLite\n", + " self.conn.execute(\"INSERT INTO temp_data (batch_id, data) VALUES (?, ?)\",\n", + " (batch_id, file_path))\n", + " self.conn.commit()\n", + "\n", + " async def get_data(self, batch_id: str) -> pd.DataFrame:\n", + " cursor = self.conn.execute(\"SELECT data FROM temp_data WHERE batch_id = ?\", (batch_id,))\n", + " result = cursor.fetchone()\n", + " if result:\n", + " data = result[0]\n", + " if data.startswith('{'): # JSON data\n", + " return pd.read_json(data)\n", + " else: # File path\n", + " return pd.read_parquet(data)\n", + " return None\n", + "\n", + " def cleanup(self):\n", + " self.conn.close()\n", + " for file in os.listdir(self.temp_dir):\n", + " os.remove(os.path.join(self.temp_dir, file))\n", + " os.rmdir(self.temp_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def get_historical_aggregated_values_batch_time_vars_data_async(\n", + " self,\n", + " start_time: datetime,\n", + " end_time: datetime,\n", + " pro_interval: int,\n", + " agg_name: str,\n", + " variable_list: List[str],\n", + " max_data_points: int = 1000,\n", + " max_retries: int = 3,\n", + " retry_delay: int = 5,\n", + " max_concurrent_requests: int = 10\n", + ") -> pd.DataFrame:\n", + " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + " logger = logging.getLogger(__name__)\n", + "\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", + " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", + " estimated_intervals = total_time_range_ms / pro_interval\n", + " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", + " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", + " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", + " time_batch_size_ms = total_time_range_ms / max_time_batches\n", + "\n", + " all_results = []\n", + " semaphore = Semaphore(max_concurrent_requests)\n", + " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n", + " data_handler = DataHandler()\n", + "\n", + " async def process_batch(vid, variables, time_batch):\n", + " async with semaphore:\n", + " batch_start_ms = time_batch * time_batch_size_ms\n", + " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", + " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", + " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", + "\n", + " body = {\n", + " **self.body,\n", + " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"ProcessingInterval\": pro_interval,\n", + " \"ReadValueIds\": variables,\n", + " \"AggregateName\": agg_name\n", + " }\n", + "\n", + " for attempt in range(max_retries):\n", + " try:\n", + " content = await request_from_api_async(\n", + " client_pool,\n", + " method=\"POST\",\n", + " endpoint=f\"/values/historicalaggregated\",\n", + " data=json.dumps(body, default=self.json_serial),\n", + " extended_timeout=True\n", + " )\n", + " break\n", + " except (aiohttp.ClientError, ValidationError) as e:\n", + " if attempt < max_retries - 1:\n", + " wait_time = retry_delay * (2 ** attempt)\n", + " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", + " await asyncio.sleep(wait_time)\n", + " else:\n", + " logger.error(f\"Max retries reached. Error: {e}\")\n", + " raise RuntimeError(f'Error message {e}')\n", + "\n", + " self._check_content(content)\n", + "\n", + " df_result = pd.json_normalize(\n", + " content, \n", + " record_path=['HistoryReadResults', 'DataValues'], \n", + " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n", + " ['HistoryReadResults', 'NodeId','Id'],\n", + " ['HistoryReadResults', 'NodeId','Namespace']]\n", + " )\n", + " batch_id = f\"{time_batch}_{vid}\"\n", + " await data_handler.save_data(batch_id, df_result)\n", + " return batch_id\n", + "\n", + " tasks = [\n", + " process_batch(vid,variables, time_batch)\n", + " for vid,variables in enumerate(variable_batches)\n", + " for time_batch in range(max_time_batches)\n", + " ]\n", + "\n", + " try:\n", + " batch_ids = await asyncio.gather(*tasks)\n", + " # for batch_id in batch_ids:\n", + " # df = await data_handler.get_data(batch_id)\n", + " # all_results.append(df)\n", + "\n", + " # logger.info(\"Combining all batches...\")\n", + " # combined_df = pd.concat(all_results, ignore_index=True)\n", + " # columns = {\n", + " # \"Value.Type\": \"ValueType\",\n", + " # \"Value.Body\": \"Value\",\n", + " # \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " # \"StatusCode.Code\": \"StatusCode\",\n", + " # \"SourceTimestamp\": \"Timestamp\",\n", + " # \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " # \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " # \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " # }\n", + " # return self._process_df(combined_df, columns)\n", + " finally:\n", + " await client_pool.close_all()\n", + " data_handler.cleanup()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1 day aggregated historical data\n", + "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_data_async(\n", + " opc_data,\n", + " start_time=start_time,\n", + " end_time=end_time,\n", + " pro_interval=pro_interval,\n", + " agg_name=agg_name,\n", + " variable_list=variable_list,\n", + " max_data_points=20000,\n", + " max_concurrent_requests=50\n", + ")\n", + "one_day_historical_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Async with parquet data handler for large data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "import aiohttp\n", + "import pandas as pd\n", + "import pyarrow as pa\n", + "import pyarrow.parquet as pq\n", + "from datetime import datetime, timedelta\n", + "import json\n", + "from typing import List, Dict, Any\n", + "import logging\n", + "from asyncio import Semaphore\n", + "from aiohttp import TCPConnector\n", + "from tenacity import retry, stop_after_attempt, wait_exponential\n", + "from concurrent.futures import ThreadPoolExecutor\n", + "\n", + "import tracemalloc\n", + "tracemalloc.start()\n", + "\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class AsyncParquetWriter:\n", + " def __init__(self, filename):\n", + " self.filename = filename\n", + " self.writer = None\n", + " self.executor = ThreadPoolExecutor(max_workers=10)\n", + "\n", + " async def write(self, df):\n", + " loop = asyncio.get_running_loop()\n", + " table = pa.Table.from_pandas(df)\n", + " if self.writer is None:\n", + " self.writer = pq.ParquetWriter(self.filename, table.schema)\n", + " await loop.run_in_executor(self.executor, self.writer.write_table, table)\n", + "\n", + " async def close(self):\n", + " if self.writer:\n", + " loop = asyncio.get_running_loop()\n", + " await loop.run_in_executor(self.executor, self.writer.close)\n", + " self.writer = None\n", + "\n", + "class DataHandler:\n", + " def __init__(self, base_path):\n", + " self.base_path = base_path\n", + " self.writers = {}\n", + "\n", + " async def save_data(self, batch_id: str, data: pd.DataFrame):\n", + " if batch_id not in self.writers:\n", + " self.writers[batch_id] = AsyncParquetWriter(f\"{self.base_path}/batch_{batch_id}.parquet\")\n", + " await self.writers[batch_id].write(data)\n", + "\n", + " async def close_all(self):\n", + " for writer in self.writers.values():\n", + " await writer.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n", + " self,\n", + " start_time: datetime,\n", + " end_time: datetime,\n", + " pro_interval: int,\n", + " agg_name: str,\n", + " variable_list: List[str],\n", + " max_data_points: int = 100000,\n", + " max_retries: int = 3,\n", + " retry_delay: int = 5,\n", + " max_concurrent_requests: int = 50\n", + ") -> pd.DataFrame:\n", + " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + " logger = logging.getLogger(__name__)\n", + "\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", + " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", + " estimated_intervals = total_time_range_ms / pro_interval\n", + " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", + " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", + " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", + " time_batch_size_ms = total_time_range_ms / max_time_batches\n", + "\n", + " all_results = []\n", + " semaphore = Semaphore(max_concurrent_requests)\n", + " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n", + " data_handler = DataHandler(base_path=\"pqfiles\")\n", + "\n", + " async def process_batch(vid, variables, time_batch):\n", + " async with semaphore:\n", + " batch_start_ms = time_batch * time_batch_size_ms\n", + " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", + " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", + " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", + "\n", + " body = {\n", + " **self.body,\n", + " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", + " \"ProcessingInterval\": pro_interval,\n", + " \"ReadValueIds\": variables,\n", + " \"AggregateName\": agg_name\n", + " }\n", + "\n", + " for attempt in range(max_retries):\n", + " try:\n", + " content = await request_from_api_async(\n", + " client_pool,\n", + " method=\"POST\",\n", + " endpoint=f\"/values/historicalaggregated\",\n", + " data=json.dumps(body, default=self.json_serial),\n", + " extended_timeout=True\n", + " )\n", + " break\n", + " except (aiohttp.ClientError, ValidationError) as e:\n", + " if attempt < max_retries - 1:\n", + " wait_time = retry_delay * (2 ** attempt)\n", + " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", + " await asyncio.sleep(wait_time)\n", + " else:\n", + " logger.error(f\"Max retries reached. Error: {e}\")\n", + " raise RuntimeError(f'Error message {e}')\n", + "\n", + " self._check_content(content)\n", + "\n", + " df_result = pd.json_normalize(\n", + " content, \n", + " record_path=['HistoryReadResults', 'DataValues'], \n", + " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n", + " ['HistoryReadResults', 'NodeId','Id'],\n", + " ['HistoryReadResults', 'NodeId','Namespace']]\n", + " )\n", + " batch_id = f\"{time_batch}_{vid}\"\n", + " await data_handler.save_data(batch_id, df_result)\n", + " return batch_id\n", + "\n", + " tasks = [\n", + " process_batch(vid,variables, time_batch)\n", + " for vid,variables in enumerate(variable_batches)\n", + " for time_batch in range(max_time_batches)\n", + " ]\n", + "\n", + " try:\n", + " batch_ids = await asyncio.gather(*tasks)\n", + " # for batch_id in batch_ids:\n", + " # df = await data_handler.get_data(batch_id)\n", + " # all_results.append(df)\n", + "\n", + " # logger.info(\"Combining all batches...\")\n", + " # combined_df = pd.concat(all_results, ignore_index=True)\n", + " # columns = {\n", + " # \"Value.Type\": \"ValueType\",\n", + " # \"Value.Body\": \"Value\",\n", + " # \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " # \"StatusCode.Code\": \"StatusCode\",\n", + " # \"SourceTimestamp\": \"Timestamp\",\n", + " # \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " # \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " # \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " # }\n", + " # return self._process_df(combined_df, columns)\n", + " finally:\n", + " await client_pool.close_all()\n", + " await data_handler.close_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1 day aggregated historical data\n", + "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n", + " opc_data,\n", + " start_time=datetime(2024,6,1,00,00),\n", + " end_time=datetime(2024,6,2,00,00),\n", + " pro_interval=pro_interval,\n", + " agg_name=agg_name,\n", + " variable_list=variable_list,\n", + " max_data_points=50000,\n", + " max_concurrent_requests=50\n", + ")\n", + "one_day_historical_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Stringset data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_historical_aggregated_values(opc_data,\n", + " start_time, \n", + " end_time, \n", + " pro_interval, \n", + " agg_name, \n", + " variable_list\n", + ") -> pd.DataFrame:\n", + " \n", + " vars = opc_data._get_variable_list_as_list(variable_list)\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars]\n", + "\n", + " body = {\n", + " **opc_data.body, \n", + " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n", + " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n", + " \"ProcessingInterval\": pro_interval, \n", + " \"AggregateName\": agg_name,\n", + " \"ReadValueIds\": extended_variables\n", + " }\n", + " print(body)\n", + "\n", + " content = request_from_api(\n", + " rest_url=opcua_rest_url, \n", + " method=\"POST\", \n", + " endpoint=\"values/historicalaggregated\", \n", + " data=json.dumps(body, default=opc_data.json_serial), \n", + " headers=opc_data.headers, \n", + " extended_timeout=True\n", + " )\n", + " print(content)\n", + " df_result = pd.json_normalize(\n", + " content, \n", + " record_path=['HistoryReadResults', 'DataValues'], \n", + " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],['HistoryReadResults', 'NodeId','Namespace']\n", + " ]\n", + " )\n", + " columns = {\n", + " \"Value.Type\": \"ValueType\",\n", + " \"Value.Body\": \"Value\",\n", + " \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " \"StatusCode.Code\": \"StatusCode\",\n", + " \"SourceTimestamp\": \"Timestamp\",\n", + " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " }\n", + " return opc_data._process_df(df_result, columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start_time=(datetime.datetime.now() - datetime.timedelta(30))\n", + "end_time=(datetime.datetime.now() - datetime.timedelta(29))\n", + "pro_interval=600000\n", + "agg_name=\"Average\"\n", + "variable_list=string_sets.variables_as_list([\"DCPower\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_historical_aggregated_values(opc_data,\n", + " start_time, \n", + " end_time, \n", + " pro_interval, \n", + " agg_name, \n", + " variable_list) -> pd.DataFrame:\n", + " vars = opc_data._get_variable_list_as_list(variable_list)\n", + " batch_size = 100\n", + " batches = [vars[i:i + batch_size] for i in range(0, len(vars), batch_size)]\n", + " \n", + " combined_df = pd.DataFrame() \n", + " \n", + " for batch in batches:\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in batch]\n", + " \n", + " body = {\n", + " **opc_data.body, \n", + " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n", + " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n", + " \"ProcessingInterval\": pro_interval, \n", + " \"AggregateName\": agg_name,\n", + " \"ReadValueIds\": extended_variables\n", + " }\n", + " \n", + " content = request_from_api(\n", + " rest_url=opcua_rest_url, \n", + " method=\"POST\", \n", + " endpoint=\"values/historicalaggregated\", \n", + " data=json.dumps(body, default=opc_data.json_serial), \n", + " headers=opc_data.headers, \n", + " extended_timeout=True\n", + " )\n", + " \n", + " df_result = pd.json_normalize(\n", + " content, \n", + " record_path=['HistoryReadResults', 'DataValues'], \n", + " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],['HistoryReadResults', 'NodeId','Namespace']]\n", + " )\n", + " \n", + " if combined_df.empty:\n", + " combined_df = df_result\n", + " else:\n", + " combined_df = pd.concat([combined_df, df_result], ignore_index=True)\n", + " \n", + " columns = {\n", + " \"Value.Type\": \"ValueType\",\n", + " \"Value.Body\": \"Value\",\n", + " \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " \"StatusCode.Code\": \"StatusCode\",\n", + " \"SourceTimestamp\": \"Timestamp\",\n", + " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " }\n", + " \n", + " return opc_data._process_df(combined_df, columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_historical_aggregated_values(opc_data,\n", + " start_time, \n", + " end_time, \n", + " pro_interval, \n", + " agg_name, \n", + " variable_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hashlib\n", + "import concurrent.futures" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_historical_aggregated_values(opc_data, start_time, end_time, pro_interval, agg_name, variable_list) -> pd.DataFrame:\n", + " vars = opc_data._get_variable_list_as_list(variable_list)\n", + " batch_size = 150\n", + " batches = [vars[i:i + batch_size] for i in range(0, len(vars), batch_size)]\n", + "\n", + " def process_batch(batch):\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in batch]\n", + " body = {\n", + " **opc_data.body,\n", + " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n", + " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n", + " \"ProcessingInterval\": pro_interval,\n", + " \"AggregateName\": agg_name,\n", + " \"ReadValueIds\": extended_variables\n", + " }\n", + " content = request_from_api(\n", + " rest_url=opcua_rest_url,\n", + " method=\"POST\",\n", + " endpoint=\"values/historicalaggregated\",\n", + " data=json.dumps(body, default=opc_data.json_serial),\n", + " headers=opc_data.headers,\n", + " extended_timeout=True\n", + " )\n", + " return pd.json_normalize(\n", + " content,\n", + " record_path=['HistoryReadResults', 'DataValues'],\n", + " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId', 'Id'], ['HistoryReadResults', 'NodeId', 'Namespace']]\n", + " )\n", + "\n", + " dataframes = []\n", + " with concurrent.futures.ThreadPoolExecutor() as executor:\n", + " future_to_batch = {executor.submit(process_batch, batch): batch for batch in batches}\n", + " for future in concurrent.futures.as_completed(future_to_batch):\n", + " dataframes.append(future.result())\n", + "\n", + " combined_df = pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()\n", + "\n", + " columns = {\n", + " \"Value.Type\": \"ValueType\",\n", + " \"Value.Body\": \"Value\",\n", + " \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " \"StatusCode.Code\": \"StatusCode\",\n", + " \"SourceTimestamp\": \"Timestamp\",\n", + " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", + " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", + " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " }\n", + "\n", + " return opc_data._process_df(combined_df, columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vars = opc_data._get_variable_list_as_list(variable_list)\n", + "extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "body = {\n", + " **opc_data.body,\n", + " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n", + " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n", + " \"ProcessingInterval\": pro_interval,\n", + " \"AggregateName\": agg_name,\n", + " \"ReadValueIds\": extended_variables\n", + "}\n", + "body" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_historical_aggregated_values(opc_data,\n", + " start_time, \n", + " end_time, \n", + " pro_interval, \n", + " agg_name, \n", + " variable_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start_time = datetime.now() - relativedelta(months=1)\n", + "end_time = datetime.now()\n", + "get_historical_aggregated_values(opc_data,\n", + " start_time, \n", + " end_time, \n", + " pro_interval, \n", + " agg_name, \n", + " variable_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# History data for 1 day, 10 min aggregate - stringsets\n", + "history_agg = opc_data.get_historical_aggregated_values(\n", + " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", + " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", + " pro_interval=600000,\n", + " agg_name=\"Average\",\n", + " variable_list=inverters.variables_as_list([\"DCPower\"]),\n", + ")\n", + "history_agg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "import math\n", + "from pydantic import BaseModel, AnyUrl\n", + "from datetime import timedelta\n", + "import asyncio\n", + "import aiohttp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class Variables(BaseModel):\n", + " \"\"\"Helper class to parse all values api's.\n", + " Variables are described in https://reference.opcfoundation.org/v104/Core/docs/Part3/8.2.1/\n", + "\n", + " Variables:\n", + " Id: str - Id of the signal, e.g. SSO.EG-AS.WeatherSymbol\n", + " Namespace: int - Namespace on the signal, e.g. 2.\n", + " IdType: int - IdTypes described in https://reference.opcfoundation.org/v104/Core/docs/Part3/8.2.3/.\n", + " \"\"\"\n", + " Id: str\n", + " Namespace: int\n", + " IdType: int" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables]) -> dict:\n", + " \"\"\"Make API request for the given time range and variable list\"\"\"\n", + "\n", + " # Creating a new variable list to remove pydantic models\n", + " vars = opc_data._get_variable_list_as_list(variable_list)\n", + "\n", + " extended_variables = [\n", + " {\n", + " \"NodeId\": var,\n", + " \"AggregateName\": agg_name,\n", + " }\n", + " for var in vars\n", + " ]\n", + " body = copy.deepcopy(opc_data.body)\n", + " body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + " body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + " body[\"ProcessingInterval\"] = pro_interval\n", + " body[\"ReadValueIds\"] = extended_variables\n", + " body[\"AggregateName\"] = agg_name\n", + "\n", + " # Make API request using aiohttp session\n", + " async with aiohttp.ClientSession() as session:\n", + " async with session.post(\n", + " f\"{opcua_rest_url}values/historicalaggregated\",\n", + " data=json.dumps(body, default=opc_data.json_serial),\n", + " headers=opc_data.headers,\n", + " timeout=aiohttp.ClientTimeout(total=None) \n", + " ) as response:\n", + " response.raise_for_status()\n", + " content = await response.json()\n", + "\n", + " return content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vars = opc_data._get_variable_list_as_list(variable_list)\n", + "vars1 = vars[0:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "extended_variables = [\n", + " {\n", + " \"NodeId\": var,\n", + " \"AggregateName\": agg_name,\n", + " }\n", + " for var in vars1\n", + "]\n", + "len(extended_variables)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "body = copy.deepcopy(opc_data.body)\n", + "body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + "body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + "body[\"ProcessingInterval\"] = pro_interval\n", + "body[\"ReadValueIds\"] = extended_variables\n", + "body[\"AggregateName\"] = agg_name\n", + "body" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f\"{opcua_rest_url}values/historicalaggregated\"," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data=json.dumps(body, default=opc_data.json_serial)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_dict = json.loads(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read_value_ids = data_dict['ReadValueIds']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(read_value_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "headers=opc_data.headers\n", + "headers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "timeout=aiohttp.ClientTimeout(total=None) \n", + "timeout" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async with aiohttp.ClientSession() as session:\n", + " async with session.post(\n", + " f\"{opcua_rest_url}values/historicalaggregated\",\n", + " data=json.dumps(body, default=opc_data.json_serial),\n", + " headers=opc_data.headers,\n", + " timeout=aiohttp.ClientTimeout(total=None) \n", + " ) as response:\n", + " response.raise_for_status()\n", + " content = await response.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_time_batches(start_time: datetime, end_time: datetime, pro_interval: int, batch_size: int) -> list[tuple]:\n", + " \"\"\"Generate time batches based on start time, end time, processing interval, and batch size\"\"\"\n", + "\n", + " total_time_range = end_time - start_time\n", + " pro_interval_seconds = (pro_interval / 1000)\n", + " total_data_points = (total_time_range.total_seconds() // pro_interval_seconds) + 1\n", + "\n", + " total_batches = math.ceil(total_data_points / batch_size)\n", + " actual_batch_size = math.ceil(total_data_points / total_batches)\n", + "\n", + " time_batches = [\n", + " (start_time + timedelta(seconds=(i * actual_batch_size * pro_interval_seconds)),\n", + " start_time + timedelta(seconds=((i + 1) * actual_batch_size * pro_interval_seconds)) - timedelta(seconds=pro_interval_seconds))\n", + " for i in range(total_batches)\n", + " ]\n", + "\n", + " return time_batches" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_variable_batches(variable_list: list[Variables], batch_size: int) -> list[list[Variables]]:\n", + " \"\"\"Generate variable batches based on the variable list and batch size\"\"\"\n", + "\n", + " variable_batches = [\n", + " variable_list[i:i + batch_size] for i in range(0, len(variable_list), batch_size)\n", + " ]\n", + "\n", + " return variable_batches" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def process_api_response(opc_data, response: dict) -> pd.DataFrame:\n", + " \"\"\"Process the API response and return the result dataframe\"\"\"\n", + " \n", + " df_result = pd.json_normalize(response, record_path=['HistoryReadResults', 'DataValues'], \n", + " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],\n", + " ['HistoryReadResults', 'NodeId','Namespace']] )\n", + "\n", + " for i, row in df_result.iterrows():\n", + " if not math.isnan(row[\"Value.Type\"]):\n", + " value_type = opc_data._get_value_type(int(row[\"Value.Type\"])).get(\"type\")\n", + " df_result.at[i, \"Value.Type\"] = str(value_type)\n", + "\n", + " df_result.rename(\n", + " columns={\n", + " \"Value.Type\": \"ValueType\",\n", + " \"Value.Body\": \"Value\",\n", + " \"StatusCode.Symbol\": \"StatusSymbol\",\n", + " \"StatusCode.Code\": \"StatusCode\",\n", + " \"SourceTimestamp\": \"Timestamp\",\n", + " \"HistoryReadResults.NodeId.IdType\": \"Id\",\n", + " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", + " },\n", + " errors=\"raise\",\n", + " inplace=True,\n", + " )\n", + "\n", + " return df_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def get_historical_aggregated_values_async(\n", + " opc_data,\n", + " start_time: datetime,\n", + " end_time: datetime,\n", + " pro_interval: int,\n", + " agg_name: str,\n", + " variable_list: list[Variables],\n", + " batch_size: int = 1000\n", + ") -> pd.DataFrame:\n", + " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n", + "\n", + " \n", + " time_batches = generate_time_batches(start_time, end_time, pro_interval, batch_size)\n", + " variable_batches = generate_variable_batches(variable_list, batch_size)\n", + "\n", + " # Creating tasks for each API request and gathering the results\n", + " tasks = []\n", + "\n", + " for time_batch_start, time_batch_end in time_batches:\n", + " for variable_sublist in variable_batches:\n", + " task = asyncio.create_task(\n", + " make_async_api_request(opc_data, time_batch_start, time_batch_end, pro_interval, agg_name, variable_sublist)\n", + " ) \n", + " tasks.append(task)\n", + " \n", + " # Execute all tasks concurrently and gather their results\n", + " responses = await asyncio.gather(*tasks)\n", + " \n", + " # Processing the API responses\n", + " result_list = []\n", + " for idx, batch_response in enumerate(responses):\n", + " \n", + " batch_result = process_api_response(opc_data, batch_response)\n", + " result_list.append(batch_result)\n", + " \n", + " result_df = pd.concat(result_list, ignore_index=True)\n", + "\n", + " return result_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1 day aggregated historical inverter data in asyncio process\n", + "one_days_historic_inverter_data2 = await get_historical_aggregated_values_async(\n", + " opc_data,\n", + " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", + " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", + " pro_interval=60*1000,\n", + " agg_name=\"Average\",\n", + " variable_list=string_sets.variables_as_list([\"DCPower\"]),\n", + " batch_size=100\n", + ")\n", + "one_days_historic_inverter_data2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def generate_time_chunks(start_time: datetime, end_time: datetime):\n", + " \"\"\"Generate time chunks between start_time and end_time, each chunk_duration_minutes long.\"\"\"\n", + " delta = timedelta(minutes=60)\n", + " current_time = start_time\n", + " while current_time < end_time:\n", + " chunk_end_time = min(current_time + delta, end_time)\n", + " yield (current_time, chunk_end_time)\n", + " current_time = chunk_end_time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables], max_data_points=500) -> dict:\n", + " \"\"\"Make API request for the given time range and variable list, with additional chunking based on data points.\"\"\"\n", + "\n", + " def chunk_list(lst, n):\n", + " \"\"\"Yield successive n-sized chunks from lst.\"\"\"\n", + " for i in range(0, len(lst), n):\n", + " yield lst[i:i + n]\n", + "\n", + " async def fetch_data_for_time_period(session, vars_chunk, start, end):\n", + " \"\"\"Fetch data for a given time period and chunk of variables.\"\"\"\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars_chunk]\n", + " body = copy.deepcopy(opc_data.body)\n", + " body[\"StartTime\"] = start.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + " body[\"EndTime\"] = end.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + " body[\"ProcessingInterval\"] = pro_interval\n", + " body[\"ReadValueIds\"] = extended_variables\n", + " body[\"AggregateName\"] = agg_name\n", + "\n", + " async with session.post(\n", + " f\"{opcua_rest_url}values/historicalaggregated\",\n", + " data=json.dumps(body, default=str),\n", + " headers=opc_data.headers,\n", + " timeout=aiohttp.ClientTimeout(total=None)\n", + " ) as response:\n", + " response.raise_for_status()\n", + " return await response.json()\n", + "\n", + " # Creating a new variable list to remove pydantic models\n", + " vars = opc_data._get_variable_list_as_list(variable_list)\n", + " chunk_size = 5 # Chunk size for node IDs\n", + " vars_chunks = list(chunk_list(vars, chunk_size))\n", + "\n", + " all_responses = []\n", + " async with aiohttp.ClientSession() as session:\n", + " for vars_chunk in vars_chunks:\n", + " # Generate time chunks for the given time period\n", + " async for start, end in generate_time_chunks(start_time, end_time):\n", + " content = await fetch_data_for_time_period(session, vars_chunk, start, end)\n", + " all_responses.append(content)\n", + " return all_responses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables]) -> dict:\n", + " \"\"\"Make API request for the given time range and variable list\"\"\"\n", + "\n", + " def chunk_list(lst, n):\n", + " for i in range(0, len(lst), n):\n", + " yield lst[i:i + n]\n", + "\n", + " # Creating a new variable list to remove pydantic models\n", + " vars = opc_data._get_variable_list_as_list(variable_list)\n", + "\n", + " chunk_size = 150 \n", + " vars_chunks = list(chunk_list(vars, chunk_size))\n", + "\n", + " all_responses = []\n", + " async with aiohttp.ClientSession() as session:\n", + " for vars_chunk in vars_chunks:\n", + " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars_chunk]\n", + " body = copy.deepcopy(opc_data.body)\n", + " body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + " body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + " body[\"ProcessingInterval\"] = pro_interval\n", + " body[\"ReadValueIds\"] = extended_variables\n", + " body[\"AggregateName\"] = agg_name\n", + "\n", + " async with session.post(\n", + " f\"{opcua_rest_url}values/historicalaggregated\",\n", + " data=json.dumps(body, default=str),\n", + " headers=opc_data.headers,\n", + " timeout=aiohttp.ClientTimeout(total=None)\n", + " ) as response:\n", + " response.raise_for_status()\n", + " content = await response.json()\n", + " all_responses.append(content) \n", + "\n", + " return all_responses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime, timedelta\n", + "from typing import List, Tuple" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_time_chunks(start_time: datetime, end_time: datetime, interval_hours: int) -> List[Tuple[datetime, datetime]]:\n", + " \"\"\"Generate time chunks within the given start and end time with specified interval in hours.\"\"\"\n", + " delta = timedelta(hours=interval_hours)\n", + " current_time = start_time\n", + " chunks = []\n", + "\n", + " while current_time < end_time:\n", + " chunk_end_time = min(current_time + delta, end_time) \n", + " chunks.append((current_time, chunk_end_time))\n", + " current_time += delta\n", + "\n", + " return chunks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1 day aggregated historical inverter data in asyncio process\n", + "one_days_historic_inverter_data2 = await make_async_api_request(\n", + " opc_data,\n", + " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", + " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", + " pro_interval=60*1000,\n", + " agg_name=\"Average\",\n", + " variable_list=string_sets.variables_as_list([\"DCPower\"])\n", + ")\n", + "one_days_historic_inverter_data2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.12.1 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "6b866f0bc560289bf4bb2415ae9074243764eb008c10d00a1da29433677418de" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/api_performance_testingapi_performance_testing.ipynb b/notebooks/api_performance_testingapi_performance_testing.ipynb deleted file mode 100644 index a2b9fa8..0000000 --- a/notebooks/api_performance_testingapi_performance_testing.ipynb +++ /dev/null @@ -1,3829 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook explores both model index and opc ua scripts and contain examples of all the functions to make request to model index api and opc ua api servers. " - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Import Libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Import the required packeages\n", - "import pandas as pd\n", - "import os\n", - "import json\n", - "import datetime\n", - "import concurrent.futures\n", - "from dotenv import load_dotenv\n", - "from pathlib import Path\n", - "from dateutil.relativedelta import relativedelta" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Import Scripts" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Import model index functions\n", - "from pyprediktormapclient.model_index import ModelIndex\n", - "\n", - "# Import OPC UA functions\n", - "from pyprediktormapclient.opc_ua import OPC_UA\n", - "\n", - "# Import Analytics Helper\n", - "from pyprediktormapclient.analytics_helper import AnalyticsHelper\n", - "\n", - "# Import \"Dataframer\" Tools\n", - "from pyprediktormapclient.shared import *\n", - "\n", - "# import AUTH_CLIENT\n", - "from pyprediktormapclient.auth_client import AUTH_CLIENT" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Consider obtaining the envrionment variables from .env file if you are running this locally from source.\n", - "dotenv_path = Path(\".env\")\n", - "load_dotenv(dotenv_path=dotenv_path)\n", - "\n", - "username = os.environ[\"USERNAME\"]\n", - "password = os.environ[\"PASSWORD\"]\n", - "opcua_rest_url = os.environ[\"OPC_UA_REST_URL\"]\n", - "opcua_server_url = os.environ[\"OPC_UA_SERVER_URL\"]\n", - "model_index_url = os.environ[\"MODEL_INDEX_URL\"]\n", - "ory_url = os.environ[\"ORY_URL\"]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Getting ory bearer token\n", - "auth_client = AUTH_CLIENT(rest_url=ory_url, username=username, password=password)\n", - "auth_client.request_new_ory_token()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Connecting to ModelIndex APIs \n", - "model = ModelIndex(url=model_index_url, auth_client=auth_client, session=auth_client.session)\n", - "\n", - "# Listed sites on the model index api server\n", - "namespaces = model.get_namespace_array()\n", - "# Types of Objects\n", - "object_types_json = model.get_object_types()\n", - "object_types = AnalyticsHelper(object_types_json)\n", - "namespace_list = object_types.namespaces_as_list(namespaces)\n", - "\n", - "# Initate the OPC UA API with a fixed namespace list\n", - "opc_data = OPC_UA(rest_url=opcua_rest_url, opcua_url=opcua_server_url, namespaces=namespace_list, auth_client=auth_client)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download data from modelindex api" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IdName
05:0:1061EquipmentEventType
15:0:1128EnergyAndPowerMeterEventType
25:0:1263EnergyAndPowerMeterCommLossEventType
35:0:1266EnergyAndPowerMeterErrorEventType
45:0:1269EnergyAndPowerMeterWarningEventType
.........
1065:0:1013GridType
1075:0:1011SectionType
1085:0:1009SiteType
1095:0:1010SubSiteType
1105:0:1012SubstationType
\n", - "

111 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " Id Name\n", - "0 5:0:1061 EquipmentEventType\n", - "1 5:0:1128 EnergyAndPowerMeterEventType\n", - "2 5:0:1263 EnergyAndPowerMeterCommLossEventType\n", - "3 5:0:1266 EnergyAndPowerMeterErrorEventType\n", - "4 5:0:1269 EnergyAndPowerMeterWarningEventType\n", - ".. ... ...\n", - "106 5:0:1013 GridType\n", - "107 5:0:1011 SectionType\n", - "108 5:0:1009 SiteType\n", - "109 5:0:1010 SubSiteType\n", - "110 5:0:1012 SubstationType\n", - "\n", - "[111 rows x 2 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Unique types of Objects\n", - "object_types_unique = object_types.dataframe[[\"Id\", \"Name\"]].drop_duplicates()\n", - "object_types_unique" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['EG-AS']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# To get the objects of a type\n", - "sites_json = model.get_objects_of_type(\"SiteType\")\n", - "\n", - "# Send the returned JSON into a normalizer to get Id, Type, Name, Props and Vars as columns\n", - "sites = AnalyticsHelper(sites_json)\n", - "sites.list_of_names()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IdTypeNameVariableIdVariableNameVariableIdSplit
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Alarms.CommLossPlantDeviceCommLossPlantDevice{'Id': 'SSO.EG-AS.Alarms.CommLossPlantDevice',...
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Signals.PPC.IsCurtailmentPPC.IsCurtailment{'Id': 'SSO.EG-AS.Signals.PPC.IsCurtailment', ...
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Signals.State.IsDayState.IsDay{'Id': 'SSO.EG-AS.Signals.State.IsDay', 'Names...
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Parameters.ContractDurationContractDuration{'Id': 'SSO.EG-AS.Parameters.ContractDuration'...
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Parameters.RegionKeyRegionKey{'Id': 'SSO.EG-AS.Parameters.RegionKey', 'Name...
.....................
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Signals.PPC.SetpointActivePowerPPC.SetpointActivePower{'Id': 'SSO.EG-AS.Signals.PPC.SetpointActivePo...
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Signals.Weather.IrradiationDiffu...Weather.IrradiationDiffuseHorizontal{'Id': 'SSO.EG-AS.Signals.Weather.IrradiationD...
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Signals.Weather.IrradiationHoriz...Weather.IrradiationHorizontal{'Id': 'SSO.EG-AS.Signals.Weather.IrradiationH...
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Signals.Weather.IrradiationInClineWeather.IrradiationInCline{'Id': 'SSO.EG-AS.Signals.Weather.IrradiationI...
03:1:SSO.EG-AS5:0:1009EG-AS3:1:SSO.EG-AS.Signals.StatusStatus{'Id': 'SSO.EG-AS.Signals.Status', 'Namespace'...
\n", - "

118 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " Id Type Name \\\n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - ".. ... ... ... \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n", - "\n", - " VariableId \\\n", - "0 3:1:SSO.EG-AS.Alarms.CommLossPlantDevice \n", - "0 3:1:SSO.EG-AS.Signals.PPC.IsCurtailment \n", - "0 3:1:SSO.EG-AS.Signals.State.IsDay \n", - "0 3:1:SSO.EG-AS.Parameters.ContractDuration \n", - "0 3:1:SSO.EG-AS.Parameters.RegionKey \n", - ".. ... \n", - "0 3:1:SSO.EG-AS.Signals.PPC.SetpointActivePower \n", - "0 3:1:SSO.EG-AS.Signals.Weather.IrradiationDiffu... \n", - "0 3:1:SSO.EG-AS.Signals.Weather.IrradiationHoriz... \n", - "0 3:1:SSO.EG-AS.Signals.Weather.IrradiationInCline \n", - "0 3:1:SSO.EG-AS.Signals.Status \n", - "\n", - " VariableName \\\n", - "0 CommLossPlantDevice \n", - "0 PPC.IsCurtailment \n", - "0 State.IsDay \n", - "0 ContractDuration \n", - "0 RegionKey \n", - ".. ... \n", - "0 PPC.SetpointActivePower \n", - "0 Weather.IrradiationDiffuseHorizontal \n", - "0 Weather.IrradiationHorizontal \n", - "0 Weather.IrradiationInCline \n", - "0 Status \n", - "\n", - " VariableIdSplit \n", - "0 {'Id': 'SSO.EG-AS.Alarms.CommLossPlantDevice',... \n", - "0 {'Id': 'SSO.EG-AS.Signals.PPC.IsCurtailment', ... \n", - "0 {'Id': 'SSO.EG-AS.Signals.State.IsDay', 'Names... \n", - "0 {'Id': 'SSO.EG-AS.Parameters.ContractDuration'... \n", - "0 {'Id': 'SSO.EG-AS.Parameters.RegionKey', 'Name... \n", - ".. ... \n", - "0 {'Id': 'SSO.EG-AS.Signals.PPC.SetpointActivePo... \n", - "0 {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationD... \n", - "0 {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationH... \n", - "0 {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationI... \n", - "0 {'Id': 'SSO.EG-AS.Signals.Status', 'Namespace'... \n", - "\n", - "[118 rows x 6 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Analytics helper\n", - "sites.variables_as_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['3:1:SSO.EG-AS']" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sites.list_of_ids()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'3:1:SSO.EG-AS'" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Selecting the second site\n", - "first_site_id = sites.list_of_ids()[0]\n", - "# first_site_id = '14:1:BE.DK-ADU'\n", - "first_site_id" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IdNameTypePropsVars
03:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1EG-AS-TS01-I01-SM10-CH1StringSetType[{'DisplayName': 'ChannelNo', 'Value': '1'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
13:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH10EG-AS-TS01-I01-SM10-CH10StringSetType[{'DisplayName': 'ChannelNo', 'Value': '10'}, ...[{'DisplayName': 'StringDisconnected', 'Id': '...
23:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH2EG-AS-TS01-I01-SM10-CH2StringSetType[{'DisplayName': 'ChannelNo', 'Value': '2'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
33:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH3EG-AS-TS01-I01-SM10-CH3StringSetType[{'DisplayName': 'ChannelNo', 'Value': '3'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
43:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH4EG-AS-TS01-I01-SM10-CH4StringSetType[{'DisplayName': 'ChannelNo', 'Value': '4'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
..................
29333:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH5EG-AS-TS11-I22-SM9-CH5StringSetType[{'DisplayName': 'ChannelNo', 'Value': '5'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
29343:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH6EG-AS-TS11-I22-SM9-CH6StringSetType[{'DisplayName': 'ChannelNo', 'Value': '6'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
29353:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH7EG-AS-TS11-I22-SM9-CH7StringSetType[{'DisplayName': 'ChannelNo', 'Value': '7'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
29363:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH8EG-AS-TS11-I22-SM9-CH8StringSetType[{'DisplayName': 'ChannelNo', 'Value': '8'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
29373:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9EG-AS-TS11-I22-SM9-CH9StringSetType[{'DisplayName': 'ChannelNo', 'Value': '9'}, {...[{'DisplayName': 'StringDisconnected', 'Id': '...
\n", - "

2938 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " Id Name \\\n", - "0 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1 EG-AS-TS01-I01-SM10-CH1 \n", - "1 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH10 EG-AS-TS01-I01-SM10-CH10 \n", - "2 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH2 EG-AS-TS01-I01-SM10-CH2 \n", - "3 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH3 EG-AS-TS01-I01-SM10-CH3 \n", - "4 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH4 EG-AS-TS01-I01-SM10-CH4 \n", - "... ... ... \n", - "2933 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH5 EG-AS-TS11-I22-SM9-CH5 \n", - "2934 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH6 EG-AS-TS11-I22-SM9-CH6 \n", - "2935 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH7 EG-AS-TS11-I22-SM9-CH7 \n", - "2936 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH8 EG-AS-TS11-I22-SM9-CH8 \n", - "2937 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9 EG-AS-TS11-I22-SM9-CH9 \n", - "\n", - " Type Props \\\n", - "0 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '1'}, {... \n", - "1 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '10'}, ... \n", - "2 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '2'}, {... \n", - "3 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '3'}, {... \n", - "4 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '4'}, {... \n", - "... ... ... \n", - "2933 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '5'}, {... \n", - "2934 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '6'}, {... \n", - "2935 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '7'}, {... \n", - "2936 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '8'}, {... \n", - "2937 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '9'}, {... \n", - "\n", - " Vars \n", - "0 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "1 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "2 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "3 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "4 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "... ... \n", - "2933 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "2934 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "2935 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "2936 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "2937 [{'DisplayName': 'StringDisconnected', 'Id': '... \n", - "\n", - "[2938 rows x 5 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get all stringsets for one park\n", - "string_sets_for_first_park_as_json = model.get_object_descendants(\n", - " \"StringSetType\", [first_site_id], \"PV_Assets\"\n", - ")\n", - "string_sets = AnalyticsHelper(string_sets_for_first_park_as_json)\n", - "string_sets.dataframe" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Query Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "variable_list =string_sets.variables_as_list([\"DCPower\"])\n", - "start_time=(datetime.datetime.now() - datetime.timedelta(30))\n", - "end_time=(datetime.datetime.now() - datetime.timedelta(29))\n", - "pro_interval=60*1000\n", - "agg_name=\"Average\"" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "from datetime import timedelta" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "import asyncio\n", - "import aiohttp\n", - "from aiohttp import ClientSession\n", - "from asyncio import Semaphore\n", - "\n", - "async def get_historical_aggregated_values_batch_time_vars_async(self, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list, max_data_points: int = 10000, max_retries: int = 3, retry_delay: int = 5, max_concurrent_requests: int = 10) -> pd.DataFrame:\n", - " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", - " logger = logging.getLogger(__name__)\n", - " # Convert variable list to the required format\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", - "\n", - " # Calculate total time range in milliseconds\n", - " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", - "\n", - " # Estimate the number of intervals based on the processing interval\n", - " estimated_intervals = total_time_range_ms / pro_interval\n", - "\n", - " # Calculate the maximum number of variables that can be processed in each batch\n", - " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", - "\n", - " # Split variables into batches\n", - " variable_batches = [\n", - " extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", - "\n", - " # Calculate the number of time batches needed based on max data points and estimated intervals\n", - " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", - "\n", - " # Calculate time batch size in milliseconds\n", - " time_batch_size_ms = total_time_range_ms / max_time_batches\n", - "\n", - " all_results = []\n", - " semaphore = Semaphore(max_concurrent_requests)\n", - "\n", - " async def process_batch(variables, time_batch):\n", - " async with semaphore:\n", - " batch_start_ms = time_batch * time_batch_size_ms\n", - " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", - " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", - " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", - "\n", - " body = {\n", - " **self.body,\n", - " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"ReadValueIds\": variables,\n", - " \"AggregateName\": agg_name\n", - " }\n", - "\n", - " for attempt in range(max_retries):\n", - " try:\n", - " async with ClientSession() as session:\n", - " async with session.post(\n", - " f\"{self.rest_url}values/historicalaggregated\",\n", - " json=body,\n", - " headers=self.headers\n", - " ) as response:\n", - " response.raise_for_status()\n", - " content = await response.json()\n", - " break\n", - " except aiohttp.ClientError as e:\n", - " if attempt < max_retries - 1:\n", - " wait_time = retry_delay * (2 ** attempt)\n", - " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", - " await asyncio.sleep(wait_time)\n", - " else:\n", - " logger.error(f\"Max retries reached. Error: {e}\")\n", - " raise RuntimeError(f'Error message {e}')\n", - "\n", - " self._check_content(content)\n", - "\n", - " df_result = pd.json_normalize(\n", - " content, \n", - " record_path=['HistoryReadResults', 'DataValues'], \n", - " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n", - " ['HistoryReadResults', 'NodeId','Id'],\n", - " ['HistoryReadResults', 'NodeId','Namespace']]\n", - " )\n", - " return df_result\n", - "\n", - " tasks = [\n", - " process_batch(variables, time_batch)\n", - " for variables in variable_batches\n", - " for time_batch in range(max_time_batches)\n", - " ]\n", - "\n", - " results = await asyncio.gather(*tasks)\n", - " all_results.extend(results)\n", - "\n", - " # Combine all batch results into a single DataFrame\n", - " logger.info(\"Combining all batches...\")\n", - " combined_df = pd.concat(all_results, ignore_index=True)\n", - " # Process and return the combined DataFrame\n", - " columns = {\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " }\n", - " return self._process_df(combined_df, columns)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-07-22 14:10:51,694 - INFO - Combining all batches...\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TimestampValueTypeValueStatusCodeStatusSymbolIdTypeIdNamespace
02024-06-22T14:07:19.691118ZDouble13861.3906251Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
12024-06-22T14:08:19.691118ZDouble13998.0800781Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
22024-06-22T14:09:19.691118ZDouble13927.2734381Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
32024-06-22T14:10:19.691118ZDouble13916.4589841Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
42024-06-22T14:11:19.691118ZDouble13997.4316411Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
...........................
42307152024-06-23T14:02:19.691118ZDouble13705.1594051Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307162024-06-23T14:03:19.691118ZDouble13593.9042971Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307172024-06-23T14:04:19.691118ZDouble13629.4355471Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307182024-06-23T14:05:19.691118ZDouble13530.1406251Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307192024-06-23T14:06:19.691118ZDouble13501.5791021Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
\n", - "

4230720 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " Timestamp ValueType Value StatusCode \\\n", - "0 2024-06-22T14:07:19.691118Z Double 13861.390625 1 \n", - "1 2024-06-22T14:08:19.691118Z Double 13998.080078 1 \n", - "2 2024-06-22T14:09:19.691118Z Double 13927.273438 1 \n", - "3 2024-06-22T14:10:19.691118Z Double 13916.458984 1 \n", - "4 2024-06-22T14:11:19.691118Z Double 13997.431641 1 \n", - "... ... ... ... ... \n", - "4230715 2024-06-23T14:02:19.691118Z Double 13705.159405 1 \n", - "4230716 2024-06-23T14:03:19.691118Z Double 13593.904297 1 \n", - "4230717 2024-06-23T14:04:19.691118Z Double 13629.435547 1 \n", - "4230718 2024-06-23T14:05:19.691118Z Double 13530.140625 1 \n", - "4230719 2024-06-23T14:06:19.691118Z Double 13501.579102 1 \n", - "\n", - " StatusSymbol IdType \\\n", - "0 Good 1 \n", - "1 Good 1 \n", - "2 Good 1 \n", - "3 Good 1 \n", - "4 Good 1 \n", - "... ... ... \n", - "4230715 Good 1 \n", - "4230716 Good 1 \n", - "4230717 Good 1 \n", - "4230718 Good 1 \n", - "4230719 Good 1 \n", - "\n", - " Id Namespace \n", - "0 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "1 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "2 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "3 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "4 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "... ... ... \n", - "4230715 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230716 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230717 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230718 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230719 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "\n", - "[4230720 rows x 8 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 1 day aggregated historical data\n", - "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n", - " opc_data,\n", - " start_time=start_time,\n", - " end_time=end_time,\n", - " pro_interval=pro_interval,\n", - " agg_name=agg_name,\n", - " variable_list=variable_list,\n", - " max_data_points=10000,\n", - " max_concurrent_requests=40\n", - ")\n", - "one_day_historical_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Batching with Async Refactoring" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import asyncio\n", - "import aiohttp\n", - "from aiohttp import ClientSession\n", - "from asyncio import Semaphore\n", - "from datetime import timedelta\n", - "from typing import Dict, List, Tuple\n", - "\n", - "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", - "logger = logging.getLogger(__name__)\n", - "\n", - "async def generate_time_batches(start_time: datetime, end_time: datetime, pro_interval: int, max_data_points: int) -> List[tuple]:\n", - " \"\"\"Generate time batches based on start time, end time, processing interval, and batch size\"\"\"\n", - "\n", - " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", - " estimated_intervals = total_time_range_ms / pro_interval\n", - " \n", - " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", - " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", - "\n", - " time_batch_size_ms = total_time_range_ms / max_time_batches\n", - "\n", - " return total_time_range_ms, max_variables_per_batch, time_batch_size_ms, max_time_batches\n", - "\n", - "def generate_variable_batches(start_time, end_time, pro_interval, variable_list: List[Dict[str, str]], max_data_points) -> List:\n", - " \"\"\"Generate variable batches based on the variable list and batch size\"\"\"\n", - "\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", - " max_variables_per_batch = generate_time_batches(start_time, end_time, pro_interval, max_data_points)[1]\n", - "\n", - " variable_batches = [\n", - " extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)\n", - " ]\n", - "\n", - " return variable_batches\n", - "\n", - "def _prepare_body(\n", - " start_time: datetime,\n", - " end_time: datetime,\n", - " pro_interval: int,\n", - " variable_list: List[Dict[str, str]], \n", - " agg_name: str,\n", - " ) -> Dict:\n", - " \"\"\"\n", - " Prepare the request body for the API call.\n", - " \"\"\"\n", - " total_time_range_ms, max_variables_per_batch, time_batch_size_ms, max_time_batches = generate_time_batches(\n", - " start_time, end_time, pro_interval, 10000)\n", - "\n", - " for time_batch in range(max_time_batches):\n", - " batch_start_ms = time_batch * time_batch_size_ms\n", - " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", - " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", - " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", - "\n", - " variable_batches = generate_variable_batches(variable_list)\n", - "\n", - " for variables in variable_batches:\n", - " body = {\n", - " **opc_data.body,\n", - " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"ReadValueIds\": variables,\n", - " \"AggregateName\": agg_name\n", - " }\n", - " return body\n", - " \n", - "def process_batch(content: dict) -> pd.DataFrame:\n", - " \"\"\" Process individual batch of data \"\"\"\n", - " \n", - " df_list = []\n", - " for item in content[\"HistoryReadResults\"]:\n", - " df = pd.json_normalize(item[\"DataValues\"])\n", - " for key, value in item[\"NodeId\"].items():\n", - " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n", - " df_list.append(df)\n", - " \n", - " if df_list:\n", - " df_result = pd.concat(df_list)\n", - " df_result.reset_index(inplace=True, drop=True)\n", - " return df_result\n", - " else:\n", - " return pd.DataFrame()\n", - " \n", - "async def make_async_api_request(opc_data, start_time:datetime, end_time:datetime,\n", - " pro_interval: int, variable_list: List[Dict[str, str]], agg_name: str,\n", - " semaphore, max_retries: int = 3, retry_delay: int = 5) -> dict:\n", - " \n", - " \"\"\"Make API request for the given time range and variable list\"\"\"\n", - "\n", - " async with semaphore:\n", - " body = _prepare_body(\n", - " start_time, \n", - " end_time, \n", - " pro_interval, \n", - " variable_list,\n", - " agg_name\n", - " )\n", - " for attempt in range(max_retries):\n", - " try:\n", - " async with ClientSession() as session:\n", - " async with session.post(\n", - " f\"{opcua_rest_url}values/historicalaggregated\",\n", - " json=body,\n", - " headers=opc_data.headers\n", - " ) as response:\n", - " response.raise_for_status()\n", - " content = await response.json()\n", - " break\n", - " except aiohttp.ClientError as e:\n", - " if attempt < max_retries - 1:\n", - " wait_time = retry_delay * (2 ** attempt)\n", - " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", - " await asyncio.sleep(wait_time)\n", - " else:\n", - " logger.error(f\"Max retries reached. Error: {e}\")\n", - " raise RuntimeError(f'Error message {e}')\n", - "\n", - " opc_data._check_content(content)\n", - "\n", - " df_result = process_batch(content)\n", - " return df_result\n", - " \n", - "async def process_api_response(opc_data, start_time:datetime, end_time:datetime,\n", - " pro_interval: int, variable_list: List[Dict[str, str]], agg_name: str,\n", - " max_concurrent_requests: int = 10) -> pd.DataFrame:\n", - " \"\"\" Process API response asynchronously and return the result dataframe \"\"\"\n", - " all_results = []\n", - " semaphore = Semaphore(max_concurrent_requests)\n", - "\n", - " tasks = [\n", - " make_async_api_request(opc_data, start_time, end_time, pro_interval, variable_list, agg_name, semaphore)\n", - " ]\n", - " results = await asyncio.gather(*tasks)\n", - " all_results.extend(results)\n", - " \n", - " if all_results:\n", - " combined_df = pd.concat(all_results, ignore_index=True)\n", - " combined_df.reset_index(inplace=True, drop=True)\n", - " columns = {\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " }\n", - " return opc_data._process_df(combined_df, columns)\n", - " else:\n", - " return pd.DataFrame()\n", - " \n", - "async def get_historical_aggregated_values_async(\n", - " opc_data,\n", - " start_time: datetime,\n", - " end_time: datetime,\n", - " pro_interval: int,\n", - " variable_list: List[Dict[str, str]],\n", - " agg_name: str,\n", - ") -> pd.DataFrame:\n", - " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n", - "\n", - " \n", - " result_df = await process_api_response(opc_data, start_time, end_time, pro_interval, variable_list, agg_name)\n", - "\n", - " return result_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 1 day aggregated historical inverter data in asyncio process\n", - "one_days_historic_inverter_data2 = await get_historical_aggregated_values_batch_time_vars_async(\n", - " start_time=start_time,\n", - " end_time=end_time,\n", - " pro_interval=60*1000,\n", - " agg_name=\"Average\",\n", - " variable_list=string_sets.variables_as_list([\"DCPower\"])\n", - ")\n", - "one_days_historic_inverter_data2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Batching with Async" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import asyncio\n", - "import aiohttp\n", - "from aiohttp import ClientSession\n", - "from asyncio import Semaphore\n", - "from datetime import timedelta\n", - "from typing import List, Dict, Tuple\n", - "\n", - "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", - "logger = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "async def _fetch_data(self, endpoint: str, body: Dict, max_retries:int, retry_delay:int) -> pd.DataFrame:\n", - " \"\"\"\n", - " Fetch data from the API and return it as a DataFrame.\n", - " \"\"\"\n", - " for attempt in range(max_retries):\n", - " try:\n", - " async with ClientSession() as session:\n", - " async with session.post(\n", - " rest_url=self.rest_url,\n", - " endpoint=endpoint,\n", - " json=body,\n", - " headers=opc_data.headers\n", - " ) as response:\n", - " response.raise_for_status()\n", - " content = await response.json()\n", - " except aiohttp.ClientError as e:\n", - " if attempt < max_retries - 1:\n", - " wait_time = retry_delay * (2 ** attempt)\n", - " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", - " await asyncio.sleep(wait_time)\n", - " else:\n", - " logger.error(f\"Max retries reached. Error: {e}\")\n", - " raise RuntimeError(f'Error message {e}')\n", - " opc_data._check_content(content)\n", - "\n", - " df_list = []\n", - " for item in content[\"HistoryReadResults\"]:\n", - " df = pd.json_normalize(item[\"DataValues\"])\n", - " for key, value in item[\"NodeId\"].items():\n", - " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n", - " df_list.append(df)\n", - " \n", - " if df_list:\n", - " df_result = pd.concat(df_list)\n", - " df_result.reset_index(inplace=True, drop=True)\n", - " return df_result\n", - " \n", - " return df_result" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "async def process_batch(self, semaphore, variables, time_batch, time_batch_size_ms, total_time_range_ms, max_retries, retry_delay):\n", - " async with semaphore:\n", - " batch_start_ms = time_batch * time_batch_size_ms\n", - " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", - " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", - " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", - "\n", - " body = {\n", - " **self.body,\n", - " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"ReadValueIds\": variables,\n", - " \"AggregateName\": agg_name\n", - " }\n", - "\n", - " for attempt in range(max_retries):\n", - " try:\n", - " async with ClientSession() as session:\n", - " async with session.post(\n", - " f\"{self.rest_url}values/historicalaggregated\",\n", - " json=body,\n", - " headers=opc_data.headers\n", - " ) as response:\n", - " response.raise_for_status()\n", - " content = await response.json()\n", - " break\n", - " except aiohttp.ClientError as e:\n", - " if attempt < max_retries - 1:\n", - " wait_time = retry_delay * (2 ** attempt)\n", - " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", - " await asyncio.sleep(wait_time)\n", - " else:\n", - " logger.error(f\"Max retries reached. Error: {e}\")\n", - " raise RuntimeError(f'Error message {e}')\n", - "\n", - " opc_data._check_content(content)\n", - "\n", - " df_list = []\n", - " for item in content[\"HistoryReadResults\"]:\n", - " df = pd.json_normalize(item[\"DataValues\"])\n", - " for key, value in item[\"NodeId\"].items():\n", - " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n", - " df_list.append(df)\n", - " \n", - " if df_list:\n", - " df_result = pd.concat(df_list)\n", - " df_result.reset_index(inplace=True, drop=True)\n", - " return df_result\n", - " \n", - " return df_result" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "async def get_historical_aggregated_values_batch_time_vars_async1(self, start_time: datetime, end_time: datetime, pro_interval: int, \n", - " agg_name: str, variable_list: list,max_data_points: int = 10000,\n", - " max_retries: int = 3, retry_delay: int = 5, \n", - " max_concurrent_requests: int = 10) -> pd.DataFrame:\n", - " \n", - " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", - " estimated_intervals = total_time_range_ms / pro_interval\n", - " \n", - " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", - " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", - " time_batch_size_ms = total_time_range_ms / max_time_batches\n", - "\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", - " variable_batches = [\n", - " extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)\n", - " ]\n", - "\n", - " all_results = []\n", - " semaphore = Semaphore(max_concurrent_requests)\n", - "\n", - " tasks = [\n", - " process_batch(self,semaphore, variables, time_batch, time_batch_size_ms, total_time_range_ms, max_retries, retry_delay)\n", - " for variables in variable_batches\n", - " for time_batch in range(max_time_batches)\n", - " ]\n", - "\n", - " results = await asyncio.gather(*tasks)\n", - " all_results.extend(results)\n", - "\n", - " logger.info(\"Combining all batches...\")\n", - " combined_df = pd.concat(all_results, ignore_index=True)\n", - " \n", - " columns = {\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " }\n", - " return self._process_df(combined_df, columns)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-07-22 12:50:34,233 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-22 12:51:01,813 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-22 12:51:51,706 - INFO - Combining all batches...\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TimestampValueTypeValueStatusCodeStatusSymbolIdTypeIdNamespace
02024-06-22T12:36:10.687988ZDouble17479.7636721Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
12024-06-22T12:37:10.687988ZDouble17516.1699221Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
22024-06-22T12:38:10.687988ZDouble17444.3144531Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
32024-06-22T12:39:10.687988ZDouble17566.6210941Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
42024-06-22T12:40:10.687988ZDouble17619.8750001Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
...........................
42307152024-06-23T12:31:10.687988ZDouble15664.3242191Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307162024-06-23T12:32:10.687988ZDouble15755.3398441Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307172024-06-23T12:33:10.687988ZDouble15869.0927731Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307182024-06-23T12:34:10.687988ZDouble15772.4853521Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307192024-06-23T12:35:10.687988ZDouble15702.3242191Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
\n", - "

4230720 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " Timestamp ValueType Value StatusCode \\\n", - "0 2024-06-22T12:36:10.687988Z Double 17479.763672 1 \n", - "1 2024-06-22T12:37:10.687988Z Double 17516.169922 1 \n", - "2 2024-06-22T12:38:10.687988Z Double 17444.314453 1 \n", - "3 2024-06-22T12:39:10.687988Z Double 17566.621094 1 \n", - "4 2024-06-22T12:40:10.687988Z Double 17619.875000 1 \n", - "... ... ... ... ... \n", - "4230715 2024-06-23T12:31:10.687988Z Double 15664.324219 1 \n", - "4230716 2024-06-23T12:32:10.687988Z Double 15755.339844 1 \n", - "4230717 2024-06-23T12:33:10.687988Z Double 15869.092773 1 \n", - "4230718 2024-06-23T12:34:10.687988Z Double 15772.485352 1 \n", - "4230719 2024-06-23T12:35:10.687988Z Double 15702.324219 1 \n", - "\n", - " StatusSymbol IdType \\\n", - "0 Good 1 \n", - "1 Good 1 \n", - "2 Good 1 \n", - "3 Good 1 \n", - "4 Good 1 \n", - "... ... ... \n", - "4230715 Good 1 \n", - "4230716 Good 1 \n", - "4230717 Good 1 \n", - "4230718 Good 1 \n", - "4230719 Good 1 \n", - "\n", - " Id Namespace \n", - "0 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "1 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "2 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "3 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "4 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "... ... ... \n", - "4230715 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230716 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230717 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230718 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230719 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "\n", - "[4230720 rows x 8 columns]" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 1 day aggregated historical data\n", - "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async1(\n", - " opc_data,\n", - " start_time=start_time,\n", - " end_time=end_time,\n", - " pro_interval=pro_interval,\n", - " agg_name=agg_name,\n", - " variable_list=variable_list,\n", - " max_data_points=10000,\n", - " max_concurrent_requests=40\n", - ")\n", - "one_day_historical_data" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "import asyncio\n", - "import aiohttp\n", - "from aiohttp import ClientSession\n", - "from asyncio import Semaphore\n", - "\n", - "async def get_historical_aggregated_values_batch_time_vars_async(self, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list, max_data_points: int = 10000, max_retries: int = 3, retry_delay: int = 5, max_concurrent_requests: int = 10) -> pd.DataFrame:\n", - " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", - " logger = logging.getLogger(__name__)\n", - " # Convert variable list to the required format\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", - "\n", - " # Calculate total time range in milliseconds\n", - " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", - "\n", - " # Estimate the number of intervals based on the processing interval\n", - " estimated_intervals = total_time_range_ms / pro_interval\n", - "\n", - " # Calculate the maximum number of variables that can be processed in each batch\n", - " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", - "\n", - " # Split variables into batches\n", - " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", - "\n", - " # Calculate the number of time batches needed based on max data points and estimated intervals\n", - " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", - "\n", - " # Calculate time batch size in milliseconds\n", - " time_batch_size_ms = total_time_range_ms / max_time_batches\n", - "\n", - " all_results = []\n", - " semaphore = Semaphore(max_concurrent_requests)\n", - "\n", - " async def process_batch(variables, time_batch):\n", - " async with semaphore:\n", - " batch_start_ms = time_batch * time_batch_size_ms\n", - " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", - " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", - " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", - "\n", - " body = {\n", - " **self.body,\n", - " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"ReadValueIds\": variables,\n", - " \"AggregateName\": agg_name\n", - " }\n", - "\n", - " for attempt in range(max_retries):\n", - " try:\n", - " async with ClientSession() as session:\n", - " async with session.post(\n", - " f\"{self.rest_url}values/historicalaggregated\",\n", - " json=body,\n", - " headers=self.headers\n", - " ) as response:\n", - " response.raise_for_status()\n", - " content = await response.json()\n", - " break\n", - " except aiohttp.ClientError as e:\n", - " if attempt < max_retries - 1:\n", - " wait_time = retry_delay * (2 ** attempt)\n", - " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", - " await asyncio.sleep(wait_time)\n", - " else:\n", - " logger.error(f\"Max retries reached. Error: {e}\")\n", - " raise RuntimeError(f'Error message {e}')\n", - "\n", - " self._check_content(content)\n", - "\n", - " df_result = process_batch_api_response(content)\n", - " return df_result\n", - "\n", - " tasks = [\n", - " process_batch(variables, time_batch)\n", - " for variables in variable_batches\n", - " for time_batch in range(max_time_batches)\n", - " ]\n", - "\n", - " results = await asyncio.gather(*tasks)\n", - " all_results.extend(results)\n", - "\n", - " # Combine all batch results into a single DataFrame\n", - " logger.info(\"Combining all batches...\")\n", - " combined_df = pd.concat(all_results, ignore_index=True)\n", - " # Process and return the combined DataFrame\n", - " columns = {\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " }\n", - " return self._process_df(combined_df, columns)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 1 day aggregated historical data\n", - "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n", - " opc_data,\n", - " start_time=start_time,\n", - " end_time=end_time,\n", - " pro_interval=pro_interval,\n", - " agg_name=agg_name,\n", - " variable_list=variable_list,\n", - " max_data_points=10000,\n", - " max_concurrent_requests=40\n", - ")\n", - "one_day_historical_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Async with ClientPool" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "import asyncio\n", - "import aiohttp\n", - "from aiohttp import ClientSession\n", - "from asyncio import Semaphore\n", - "from typing import List, Dict, Any\n", - "from datetime import datetime, timedelta\n", - "import pandas as pd\n", - "import logging\n", - "from pydantic import AnyUrl, ValidationError" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "class ClientPool:\n", - " def __init__(self, num_clients: int, rest_url: str, headers: Dict[str, str]):\n", - " self.clients = asyncio.Queue()\n", - " for _ in range(num_clients):\n", - " self.clients.put_nowait(aiohttp.ClientSession(base_url=rest_url, headers=headers))\n", - " self.num_clients = num_clients\n", - "\n", - " async def get_client(self):\n", - " client = await self.clients.get()\n", - " return client\n", - "\n", - " async def release_client(self, client):\n", - " await self.clients.put(client)\n", - "\n", - " async def close_all(self):\n", - " while not self.clients.empty():\n", - " client = await self.clients.get()\n", - " await client.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "async def request_from_api_async(\n", - " client_pool: ClientPool,\n", - " method: str,\n", - " endpoint: str,\n", - " data: str = None,\n", - " params: Dict[str, Any] = None,\n", - " extended_timeout: bool = False,\n", - ") -> Dict[str, Any]:\n", - " timeout = aiohttp.ClientTimeout(total=300 if extended_timeout else 30)\n", - " client = await client_pool.get_client()\n", - " \n", - " try:\n", - " if method == \"GET\":\n", - " async with client.get(endpoint, params=params, timeout=timeout) as response:\n", - " response.raise_for_status()\n", - " if 'application/json' in response.headers.get('Content-Type', ''):\n", - " return await response.json()\n", - " else:\n", - " return {\"error\": \"Non-JSON response\", \"content\": await response.text()}\n", - " elif method == \"POST\":\n", - " async with client.post(endpoint, data=data, params=params, timeout=timeout) as response:\n", - " response.raise_for_status()\n", - " if 'application/json' in response.headers.get('Content-Type', ''):\n", - " return await response.json()\n", - " else:\n", - " return {\"error\": \"Non-JSON response\", \"content\": await response.text()}\n", - " else:\n", - " raise ValidationError(\"Unsupported method\")\n", - " finally:\n", - " await client_pool.release_client(client)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "async def get_historical_aggregated_values_batch_time_vars_async(\n", - " self,\n", - " start_time: datetime,\n", - " end_time: datetime,\n", - " pro_interval: int,\n", - " agg_name: str,\n", - " variable_list: List[str],\n", - " max_data_points: int = 100000,\n", - " max_retries: int = 3,\n", - " retry_delay: int = 5,\n", - " max_concurrent_requests: int = 50\n", - ") -> pd.DataFrame:\n", - " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", - " logger = logging.getLogger(__name__)\n", - "\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", - " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", - " estimated_intervals = total_time_range_ms / pro_interval\n", - " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", - " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", - " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", - " time_batch_size_ms = total_time_range_ms / max_time_batches\n", - "\n", - " all_results = []\n", - " semaphore = Semaphore(max_concurrent_requests)\n", - " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n", - "\n", - " async def process_batch(variables, time_batch):\n", - " async with semaphore:\n", - " batch_start_ms = time_batch * time_batch_size_ms\n", - " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", - " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", - " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", - "\n", - " body = {\n", - " **self.body,\n", - " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"ReadValueIds\": variables,\n", - " \"AggregateName\": agg_name\n", - " }\n", - "\n", - " for attempt in range(max_retries):\n", - " try:\n", - " content = await request_from_api_async(\n", - " client_pool,\n", - " method=\"POST\",\n", - " endpoint=f\"/values/historicalaggregated\",\n", - " data=json.dumps(body, default=self.json_serial),\n", - " extended_timeout=True\n", - " )\n", - " break\n", - " except (aiohttp.ClientError, ValidationError) as e:\n", - " if attempt < max_retries - 1:\n", - " wait_time = retry_delay * (2 ** attempt)\n", - " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", - " await asyncio.sleep(wait_time)\n", - " else:\n", - " logger.error(f\"Max retries reached. Error: {e}\")\n", - " raise RuntimeError(f'Error message {e}')\n", - "\n", - " self._check_content(content)\n", - "\n", - " df_result = pd.json_normalize(\n", - " content, \n", - " record_path=['HistoryReadResults', 'DataValues'], \n", - " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n", - " ['HistoryReadResults', 'NodeId','Id'],\n", - " ['HistoryReadResults', 'NodeId','Namespace']]\n", - " )\n", - " return df_result\n", - "\n", - " tasks = [\n", - " process_batch(variables, time_batch)\n", - " for variables in variable_batches\n", - " for time_batch in range(max_time_batches)\n", - " ]\n", - "\n", - " try:\n", - " results = await asyncio.gather(*tasks)\n", - " all_results.extend(results)\n", - "\n", - " logger.info(\"Combining all batches...\")\n", - " combined_df = pd.concat(all_results, ignore_index=True)\n", - " columns = {\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " }\n", - " return self._process_df(combined_df, columns)\n", - " finally:\n", - " await client_pool.close_all()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-07-22 14:13:58,459 - INFO - Combining all batches...\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TimestampValueTypeValueStatusCodeStatusSymbolIdTypeIdNamespace
02024-06-22T14:07:19.691118ZDouble13861.3906251Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
12024-06-22T14:08:19.691118ZDouble13998.0800781Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
22024-06-22T14:09:19.691118ZDouble13927.2734381Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
32024-06-22T14:10:19.691118ZDouble13916.4589841Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
42024-06-22T14:11:19.691118ZDouble13997.4316411Good1SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower3
...........................
42307152024-06-23T14:02:19.691118ZDouble13705.1594051Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307162024-06-23T14:03:19.691118ZDouble13593.9042971Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307172024-06-23T14:04:19.691118ZDouble13629.4355471Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307182024-06-23T14:05:19.691118ZDouble13530.1406251Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
42307192024-06-23T14:06:19.691118ZDouble13501.5791021Good1SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower3
\n", - "

4230720 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " Timestamp ValueType Value StatusCode \\\n", - "0 2024-06-22T14:07:19.691118Z Double 13861.390625 1 \n", - "1 2024-06-22T14:08:19.691118Z Double 13998.080078 1 \n", - "2 2024-06-22T14:09:19.691118Z Double 13927.273438 1 \n", - "3 2024-06-22T14:10:19.691118Z Double 13916.458984 1 \n", - "4 2024-06-22T14:11:19.691118Z Double 13997.431641 1 \n", - "... ... ... ... ... \n", - "4230715 2024-06-23T14:02:19.691118Z Double 13705.159405 1 \n", - "4230716 2024-06-23T14:03:19.691118Z Double 13593.904297 1 \n", - "4230717 2024-06-23T14:04:19.691118Z Double 13629.435547 1 \n", - "4230718 2024-06-23T14:05:19.691118Z Double 13530.140625 1 \n", - "4230719 2024-06-23T14:06:19.691118Z Double 13501.579102 1 \n", - "\n", - " StatusSymbol IdType \\\n", - "0 Good 1 \n", - "1 Good 1 \n", - "2 Good 1 \n", - "3 Good 1 \n", - "4 Good 1 \n", - "... ... ... \n", - "4230715 Good 1 \n", - "4230716 Good 1 \n", - "4230717 Good 1 \n", - "4230718 Good 1 \n", - "4230719 Good 1 \n", - "\n", - " Id Namespace \n", - "0 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "1 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "2 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "3 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "4 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n", - "... ... ... \n", - "4230715 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230716 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230717 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230718 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "4230719 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n", - "\n", - "[4230720 rows x 8 columns]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 1 day aggregated historical data\n", - "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n", - " opc_data,\n", - " start_time=start_time,\n", - " end_time=end_time,\n", - " pro_interval=pro_interval,\n", - " agg_name=agg_name,\n", - " variable_list=variable_list,\n", - " max_data_points=10000,\n", - " max_concurrent_requests=50\n", - ")\n", - "one_day_historical_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Async with Data Handler" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "import asyncio\n", - "import aiohttp\n", - "import pandas as pd\n", - "import sqlite3\n", - "import tempfile\n", - "import os\n", - "import json\n", - "from asyncio import Semaphore\n", - "from typing import List, Dict, Any\n", - "from datetime import datetime, timedelta\n", - "import logging\n", - "import pyarrow as pa\n", - "import pyarrow.parquet as pq" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "class DataHandler:\n", - " def __init__(self, max_memory_rows=10000):\n", - " self.max_memory_rows = max_memory_rows\n", - " self.temp_dir = tempfile.mkdtemp()\n", - " self.db_path = os.path.join(self.temp_dir, 'temp_data.db')\n", - " self.conn = sqlite3.connect(self.db_path)\n", - " self.conn.execute('''CREATE TABLE IF NOT EXISTS temp_data\n", - " (id INTEGER PRIMARY KEY AUTOINCREMENT,\n", - " batch_id TEXT,\n", - " data TEXT)''')\n", - "\n", - " async def save_data(self, batch_id: str, data: pd.DataFrame):\n", - " if len(data) <= self.max_memory_rows:\n", - " # Store small datasets directly in SQLite\n", - " self.conn.execute(\"INSERT INTO temp_data (batch_id, data) VALUES (?, ?)\",\n", - " (batch_id, data.to_json()))\n", - " else:\n", - " # Stream larger datasets to Parquet file\n", - " file_path = os.path.join(self.temp_dir, f\"batch_{batch_id}.parquet\")\n", - " table = pa.Table.from_pandas(data)\n", - " pq.write_table(table, file_path)\n", - " \n", - " # Store file path in SQLite\n", - " self.conn.execute(\"INSERT INTO temp_data (batch_id, data) VALUES (?, ?)\",\n", - " (batch_id, file_path))\n", - " self.conn.commit()\n", - "\n", - " async def get_data(self, batch_id: str) -> pd.DataFrame:\n", - " cursor = self.conn.execute(\"SELECT data FROM temp_data WHERE batch_id = ?\", (batch_id,))\n", - " result = cursor.fetchone()\n", - " if result:\n", - " data = result[0]\n", - " if data.startswith('{'): # JSON data\n", - " return pd.read_json(data)\n", - " else: # File path\n", - " return pd.read_parquet(data)\n", - " return None\n", - "\n", - " def cleanup(self):\n", - " self.conn.close()\n", - " for file in os.listdir(self.temp_dir):\n", - " os.remove(os.path.join(self.temp_dir, file))\n", - " os.rmdir(self.temp_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "async def get_historical_aggregated_values_batch_time_vars_data_async(\n", - " self,\n", - " start_time: datetime,\n", - " end_time: datetime,\n", - " pro_interval: int,\n", - " agg_name: str,\n", - " variable_list: List[str],\n", - " max_data_points: int = 1000,\n", - " max_retries: int = 3,\n", - " retry_delay: int = 5,\n", - " max_concurrent_requests: int = 10\n", - ") -> pd.DataFrame:\n", - " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", - " logger = logging.getLogger(__name__)\n", - "\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", - " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", - " estimated_intervals = total_time_range_ms / pro_interval\n", - " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", - " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", - " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", - " time_batch_size_ms = total_time_range_ms / max_time_batches\n", - "\n", - " all_results = []\n", - " semaphore = Semaphore(max_concurrent_requests)\n", - " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n", - " data_handler = DataHandler()\n", - "\n", - " async def process_batch(vid, variables, time_batch):\n", - " async with semaphore:\n", - " batch_start_ms = time_batch * time_batch_size_ms\n", - " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", - " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", - " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", - "\n", - " body = {\n", - " **self.body,\n", - " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"ReadValueIds\": variables,\n", - " \"AggregateName\": agg_name\n", - " }\n", - "\n", - " for attempt in range(max_retries):\n", - " try:\n", - " content = await request_from_api_async(\n", - " client_pool,\n", - " method=\"POST\",\n", - " endpoint=f\"/values/historicalaggregated\",\n", - " data=json.dumps(body, default=self.json_serial),\n", - " extended_timeout=True\n", - " )\n", - " break\n", - " except (aiohttp.ClientError, ValidationError) as e:\n", - " if attempt < max_retries - 1:\n", - " wait_time = retry_delay * (2 ** attempt)\n", - " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", - " await asyncio.sleep(wait_time)\n", - " else:\n", - " logger.error(f\"Max retries reached. Error: {e}\")\n", - " raise RuntimeError(f'Error message {e}')\n", - "\n", - " self._check_content(content)\n", - "\n", - " df_result = pd.json_normalize(\n", - " content, \n", - " record_path=['HistoryReadResults', 'DataValues'], \n", - " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n", - " ['HistoryReadResults', 'NodeId','Id'],\n", - " ['HistoryReadResults', 'NodeId','Namespace']]\n", - " )\n", - " batch_id = f\"{time_batch}_{vid}\"\n", - " await data_handler.save_data(batch_id, df_result)\n", - " return batch_id\n", - "\n", - " tasks = [\n", - " process_batch(vid,variables, time_batch)\n", - " for vid,variables in enumerate(variable_batches)\n", - " for time_batch in range(max_time_batches)\n", - " ]\n", - "\n", - " try:\n", - " batch_ids = await asyncio.gather(*tasks)\n", - " # for batch_id in batch_ids:\n", - " # df = await data_handler.get_data(batch_id)\n", - " # all_results.append(df)\n", - "\n", - " # logger.info(\"Combining all batches...\")\n", - " # combined_df = pd.concat(all_results, ignore_index=True)\n", - " # columns = {\n", - " # \"Value.Type\": \"ValueType\",\n", - " # \"Value.Body\": \"Value\",\n", - " # \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " # \"StatusCode.Code\": \"StatusCode\",\n", - " # \"SourceTimestamp\": \"Timestamp\",\n", - " # \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " # \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " # \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " # }\n", - " # return self._process_df(combined_df, columns)\n", - " finally:\n", - " await client_pool.close_all()\n", - " data_handler.cleanup()" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "# 1 day aggregated historical data\n", - "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_data_async(\n", - " opc_data,\n", - " start_time=start_time,\n", - " end_time=end_time,\n", - " pro_interval=pro_interval,\n", - " agg_name=agg_name,\n", - " variable_list=variable_list,\n", - " max_data_points=20000,\n", - " max_concurrent_requests=50\n", - ")\n", - "one_day_historical_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Async with parquet data handler for large data" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "import asyncio\n", - "import aiohttp\n", - "import pandas as pd\n", - "import pyarrow as pa\n", - "import pyarrow.parquet as pq\n", - "from datetime import datetime, timedelta\n", - "import json\n", - "from typing import List, Dict, Any\n", - "import logging\n", - "from asyncio import Semaphore\n", - "from aiohttp import TCPConnector\n", - "from tenacity import retry, stop_after_attempt, wait_exponential\n", - "from concurrent.futures import ThreadPoolExecutor\n", - "\n", - "import tracemalloc\n", - "tracemalloc.start()\n", - "\n", - "logger = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "class AsyncParquetWriter:\n", - " def __init__(self, filename):\n", - " self.filename = filename\n", - " self.writer = None\n", - " self.executor = ThreadPoolExecutor(max_workers=10)\n", - "\n", - " async def write(self, df):\n", - " loop = asyncio.get_running_loop()\n", - " table = pa.Table.from_pandas(df)\n", - " if self.writer is None:\n", - " self.writer = pq.ParquetWriter(self.filename, table.schema)\n", - " await loop.run_in_executor(self.executor, self.writer.write_table, table)\n", - "\n", - " async def close(self):\n", - " if self.writer:\n", - " loop = asyncio.get_running_loop()\n", - " await loop.run_in_executor(self.executor, self.writer.close)\n", - " self.writer = None\n", - "\n", - "class DataHandler:\n", - " def __init__(self, base_path):\n", - " self.base_path = base_path\n", - " self.writers = {}\n", - "\n", - " async def save_data(self, batch_id: str, data: pd.DataFrame):\n", - " if batch_id not in self.writers:\n", - " self.writers[batch_id] = AsyncParquetWriter(f\"{self.base_path}/batch_{batch_id}.parquet\")\n", - " await self.writers[batch_id].write(data)\n", - "\n", - " async def close_all(self):\n", - " for writer in self.writers.values():\n", - " await writer.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "async def get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n", - " self,\n", - " start_time: datetime,\n", - " end_time: datetime,\n", - " pro_interval: int,\n", - " agg_name: str,\n", - " variable_list: List[str],\n", - " max_data_points: int = 100000,\n", - " max_retries: int = 3,\n", - " retry_delay: int = 5,\n", - " max_concurrent_requests: int = 50\n", - ") -> pd.DataFrame:\n", - " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", - " logger = logging.getLogger(__name__)\n", - "\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n", - " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n", - " estimated_intervals = total_time_range_ms / pro_interval\n", - " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n", - " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n", - " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n", - " time_batch_size_ms = total_time_range_ms / max_time_batches\n", - "\n", - " all_results = []\n", - " semaphore = Semaphore(max_concurrent_requests)\n", - " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n", - " data_handler = DataHandler(base_path=\"pqfiles\")\n", - "\n", - " async def process_batch(vid, variables, time_batch):\n", - " async with semaphore:\n", - " batch_start_ms = time_batch * time_batch_size_ms\n", - " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n", - " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n", - " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n", - "\n", - " body = {\n", - " **self.body,\n", - " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"ReadValueIds\": variables,\n", - " \"AggregateName\": agg_name\n", - " }\n", - "\n", - " for attempt in range(max_retries):\n", - " try:\n", - " content = await request_from_api_async(\n", - " client_pool,\n", - " method=\"POST\",\n", - " endpoint=f\"/values/historicalaggregated\",\n", - " data=json.dumps(body, default=self.json_serial),\n", - " extended_timeout=True\n", - " )\n", - " break\n", - " except (aiohttp.ClientError, ValidationError) as e:\n", - " if attempt < max_retries - 1:\n", - " wait_time = retry_delay * (2 ** attempt)\n", - " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n", - " await asyncio.sleep(wait_time)\n", - " else:\n", - " logger.error(f\"Max retries reached. Error: {e}\")\n", - " raise RuntimeError(f'Error message {e}')\n", - "\n", - " self._check_content(content)\n", - "\n", - " df_result = pd.json_normalize(\n", - " content, \n", - " record_path=['HistoryReadResults', 'DataValues'], \n", - " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n", - " ['HistoryReadResults', 'NodeId','Id'],\n", - " ['HistoryReadResults', 'NodeId','Namespace']]\n", - " )\n", - " batch_id = f\"{time_batch}_{vid}\"\n", - " await data_handler.save_data(batch_id, df_result)\n", - " return batch_id\n", - "\n", - " tasks = [\n", - " process_batch(vid,variables, time_batch)\n", - " for vid,variables in enumerate(variable_batches)\n", - " for time_batch in range(max_time_batches)\n", - " ]\n", - "\n", - " try:\n", - " batch_ids = await asyncio.gather(*tasks)\n", - " # for batch_id in batch_ids:\n", - " # df = await data_handler.get_data(batch_id)\n", - " # all_results.append(df)\n", - "\n", - " # logger.info(\"Combining all batches...\")\n", - " # combined_df = pd.concat(all_results, ignore_index=True)\n", - " # columns = {\n", - " # \"Value.Type\": \"ValueType\",\n", - " # \"Value.Body\": \"Value\",\n", - " # \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " # \"StatusCode.Code\": \"StatusCode\",\n", - " # \"SourceTimestamp\": \"Timestamp\",\n", - " # \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " # \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " # \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " # }\n", - " # return self._process_df(combined_df, columns)\n", - " finally:\n", - " await client_pool.close_all()\n", - " await data_handler.close_all()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-07-18 12:29:48,821 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:48,825 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:48,830 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:48,836 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,941 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,950 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,952 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,955 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,958 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,965 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,968 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,970 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,973 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,975 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,976 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,982 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,986 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,989 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,991 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,994 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,997 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:54,999 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,003 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,008 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,010 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,015 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,018 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,022 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,025 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,027 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:29:55,030 - WARNING - Request failed. Retrying in 5 seconds...\n" - ] - }, - { - "ename": "RuntimeError", - "evalue": "BadSecureChannelClosed", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[35], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# 1 day aggregated historical data\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m one_day_historical_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n\u001b[0;32m 3\u001b[0m opc_data,\n\u001b[0;32m 4\u001b[0m start_time\u001b[38;5;241m=\u001b[39mdatetime(\u001b[38;5;241m2024\u001b[39m,\u001b[38;5;241m6\u001b[39m,\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m00\u001b[39m,\u001b[38;5;241m00\u001b[39m),\n\u001b[0;32m 5\u001b[0m end_time\u001b[38;5;241m=\u001b[39mdatetime(\u001b[38;5;241m2024\u001b[39m,\u001b[38;5;241m6\u001b[39m,\u001b[38;5;241m2\u001b[39m,\u001b[38;5;241m00\u001b[39m,\u001b[38;5;241m00\u001b[39m),\n\u001b[0;32m 6\u001b[0m pro_interval\u001b[38;5;241m=\u001b[39mpro_interval,\n\u001b[0;32m 7\u001b[0m agg_name\u001b[38;5;241m=\u001b[39magg_name,\n\u001b[0;32m 8\u001b[0m variable_list\u001b[38;5;241m=\u001b[39mvariable_list,\n\u001b[0;32m 9\u001b[0m max_data_points\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50000\u001b[39m,\n\u001b[0;32m 10\u001b[0m max_concurrent_requests\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m\n\u001b[0;32m 11\u001b[0m )\n\u001b[0;32m 12\u001b[0m one_day_historical_data\n", - "Cell \u001b[1;32mIn[33], line 84\u001b[0m, in \u001b[0;36mget_historical_aggregated_values_batch_time_vars_data_async_parquet\u001b[1;34m(self, start_time, end_time, pro_interval, agg_name, variable_list, max_data_points, max_retries, retry_delay, max_concurrent_requests)\u001b[0m\n\u001b[0;32m 77\u001b[0m tasks \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 78\u001b[0m process_batch(vid,variables, time_batch)\n\u001b[0;32m 79\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m vid,variables \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variable_batches)\n\u001b[0;32m 80\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m time_batch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(max_time_batches)\n\u001b[0;32m 81\u001b[0m ]\n\u001b[0;32m 83\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 84\u001b[0m batch_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mtasks)\n\u001b[0;32m 85\u001b[0m \u001b[38;5;66;03m# for batch_id in batch_ids:\u001b[39;00m\n\u001b[0;32m 86\u001b[0m \u001b[38;5;66;03m# df = await data_handler.get_data(batch_id)\u001b[39;00m\n\u001b[0;32m 87\u001b[0m \u001b[38;5;66;03m# all_results.append(df)\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[38;5;66;03m# return self._process_df(combined_df, columns)\u001b[39;00m\n\u001b[0;32m 102\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 103\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m client_pool\u001b[38;5;241m.\u001b[39mclose_all()\n", - "Cell \u001b[1;32mIn[33], line 64\u001b[0m, in \u001b[0;36mget_historical_aggregated_values_batch_time_vars_data_async_parquet..process_batch\u001b[1;34m(vid, variables, time_batch)\u001b[0m\n\u001b[0;32m 61\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMax retries reached. Error: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 62\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError message \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m---> 64\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_check_content\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 66\u001b[0m df_result \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mjson_normalize(\n\u001b[0;32m 67\u001b[0m content, \n\u001b[0;32m 68\u001b[0m record_path\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHistoryReadResults\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDataValues\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 71\u001b[0m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHistoryReadResults\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNodeId\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNamespace\u001b[39m\u001b[38;5;124m'\u001b[39m]]\n\u001b[0;32m 72\u001b[0m )\n\u001b[0;32m 73\u001b[0m batch_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtime_batch\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n", - "File \u001b[1;32m~\\OneDrive - TGS Prediktor AS\\Dokumenter\\git_repos\\pyPrediktorMapClient\\src\\pyprediktormapclient\\opc_ua.py:319\u001b[0m, in \u001b[0;36mOPC_UA._check_content\u001b[1;34m(self, content)\u001b[0m\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo content returned from the server\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 318\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m content\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m--> 319\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(content\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mErrorMessage\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[0;32m 320\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHistoryReadResults\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m content:\n\u001b[0;32m 321\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(content\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mErrorMessage\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n", - "\u001b[1;31mRuntimeError\u001b[0m: BadSecureChannelClosed" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-07-18 12:31:33,268 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,271 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,274 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,276 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,278 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,282 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,286 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,292 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,297 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:31:33,301 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:32:37,429 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:42,844 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:42,847 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:42,849 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:45,402 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,404 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,406 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,409 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,412 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,414 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,415 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,417 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,420 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,423 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,424 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,425 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,427 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,429 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,430 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,431 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,433 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,435 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,437 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,438 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,439 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,441 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,443 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,446 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,448 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,449 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,451 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,452 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,453 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,455 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,456 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,458 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,459 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,461 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,462 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,463 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,464 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,466 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,468 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,470 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,471 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,473 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,475 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,476 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,477 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,478 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,480 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,482 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,483 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,485 - ERROR - Unclosed client session\n", - "client_session: \n", - "2024-07-18 12:32:45,513 - ERROR - Unclosed connector\n", - "connections: ['[(, 103057.281)]']\n", - "connector: \n", - "2024-07-18 12:32:50,890 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:50,892 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:50,894 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:50,896 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:50,897 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:50,901 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:50,904 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:50,909 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:32:50,911 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:02,993 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:02,995 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:02,996 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,000 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,002 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,005 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,006 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,010 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,012 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,018 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,020 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,022 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,025 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,026 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:03,029 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:09,145 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:09,147 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:09,149 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:42,948 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:33:50,613 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:09,237 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:34:15,552 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:15,556 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:15,562 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:15,566 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:15,569 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:15,589 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:15,603 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:15,611 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:34:21,430 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:34:21,434 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:34:21,482 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:34:21,510 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:21,545 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:21,585 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:34:21,630 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:21,965 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:34:21,989 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:34:22,008 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,026 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:34:22,039 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,046 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,052 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,058 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,060 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,068 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,075 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,083 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,089 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,090 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,097 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,105 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,106 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,108 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,117 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,119 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,123 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,124 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,126 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,128 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,130 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,136 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,138 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,140 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,142 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:22,146 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:43,133 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:34:50,798 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:09,424 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:15,741 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:35:15,858 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:21,702 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:21,778 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:21,817 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,367 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,414 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,441 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,446 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,447 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,470 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,476 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,483 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,495 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,508 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,511 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,522 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,574 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:22,578 - WARNING - Request failed. Retrying in 5 seconds...\n", - "2024-07-18 12:35:25,808 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:25,813 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:25,815 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:25,825 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:25,830 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:25,832 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:25,836 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:25,845 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:35:26,682 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:35:26,699 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:35:26,783 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:35:26,930 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:35:27,164 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:35:27,202 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:35:27,246 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:35:56,099 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:25,968 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:36:26,988 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,122 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,150 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,661 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,666 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,681 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,719 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,746 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,796 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,830 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,838 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,842 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,866 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,874 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,881 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,884 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:27,958 - WARNING - Request failed. Retrying in 10 seconds...\n", - "2024-07-18 12:36:36,878 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:36:36,944 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:36:36,977 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:36:37,175 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:36:37,322 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:36:37,373 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:36:37,483 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:06,473 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:37,230 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:37,350 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:37,383 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,132 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,168 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,171 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,173 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,175 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,181 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,184 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,202 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,205 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,207 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,210 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,212 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,216 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n", - "2024-07-18 12:37:38,227 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n" - ] - } - ], - "source": [ - "# 1 day aggregated historical data\n", - "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n", - " opc_data,\n", - " start_time=datetime(2024,6,1,00,00),\n", - " end_time=datetime(2024,6,2,00,00),\n", - " pro_interval=pro_interval,\n", - " agg_name=agg_name,\n", - " variable_list=variable_list,\n", - " max_data_points=50000,\n", - " max_concurrent_requests=50\n", - ")\n", - "one_day_historical_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Stringset data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_historical_aggregated_values(opc_data,\n", - " start_time, \n", - " end_time, \n", - " pro_interval, \n", - " agg_name, \n", - " variable_list\n", - ") -> pd.DataFrame:\n", - " \n", - " vars = opc_data._get_variable_list_as_list(variable_list)\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars]\n", - "\n", - " body = {\n", - " **opc_data.body, \n", - " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n", - " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n", - " \"ProcessingInterval\": pro_interval, \n", - " \"AggregateName\": agg_name,\n", - " \"ReadValueIds\": extended_variables\n", - " }\n", - " print(body)\n", - "\n", - " content = request_from_api(\n", - " rest_url=opcua_rest_url, \n", - " method=\"POST\", \n", - " endpoint=\"values/historicalaggregated\", \n", - " data=json.dumps(body, default=opc_data.json_serial), \n", - " headers=opc_data.headers, \n", - " extended_timeout=True\n", - " )\n", - " print(content)\n", - " df_result = pd.json_normalize(\n", - " content, \n", - " record_path=['HistoryReadResults', 'DataValues'], \n", - " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],['HistoryReadResults', 'NodeId','Namespace']\n", - " ]\n", - " )\n", - " columns = {\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " }\n", - " return opc_data._process_df(df_result, columns)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "start_time=(datetime.datetime.now() - datetime.timedelta(30))\n", - "end_time=(datetime.datetime.now() - datetime.timedelta(29))\n", - "pro_interval=600000\n", - "agg_name=\"Average\"\n", - "variable_list=string_sets.variables_as_list([\"DCPower\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_historical_aggregated_values(opc_data,\n", - " start_time, \n", - " end_time, \n", - " pro_interval, \n", - " agg_name, \n", - " variable_list) -> pd.DataFrame:\n", - " vars = opc_data._get_variable_list_as_list(variable_list)\n", - " batch_size = 100\n", - " batches = [vars[i:i + batch_size] for i in range(0, len(vars), batch_size)]\n", - " \n", - " combined_df = pd.DataFrame() \n", - " \n", - " for batch in batches:\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in batch]\n", - " \n", - " body = {\n", - " **opc_data.body, \n", - " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n", - " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n", - " \"ProcessingInterval\": pro_interval, \n", - " \"AggregateName\": agg_name,\n", - " \"ReadValueIds\": extended_variables\n", - " }\n", - " \n", - " content = request_from_api(\n", - " rest_url=opcua_rest_url, \n", - " method=\"POST\", \n", - " endpoint=\"values/historicalaggregated\", \n", - " data=json.dumps(body, default=opc_data.json_serial), \n", - " headers=opc_data.headers, \n", - " extended_timeout=True\n", - " )\n", - " \n", - " df_result = pd.json_normalize(\n", - " content, \n", - " record_path=['HistoryReadResults', 'DataValues'], \n", - " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],['HistoryReadResults', 'NodeId','Namespace']]\n", - " )\n", - " \n", - " if combined_df.empty:\n", - " combined_df = df_result\n", - " else:\n", - " combined_df = pd.concat([combined_df, df_result], ignore_index=True)\n", - " \n", - " columns = {\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " }\n", - " \n", - " return opc_data._process_df(combined_df, columns)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_historical_aggregated_values(opc_data,\n", - " start_time, \n", - " end_time, \n", - " pro_interval, \n", - " agg_name, \n", - " variable_list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import hashlib\n", - "import concurrent.futures" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_historical_aggregated_values(opc_data, start_time, end_time, pro_interval, agg_name, variable_list) -> pd.DataFrame:\n", - " vars = opc_data._get_variable_list_as_list(variable_list)\n", - " batch_size = 150\n", - " batches = [vars[i:i + batch_size] for i in range(0, len(vars), batch_size)]\n", - "\n", - " def process_batch(batch):\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in batch]\n", - " body = {\n", - " **opc_data.body,\n", - " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n", - " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"AggregateName\": agg_name,\n", - " \"ReadValueIds\": extended_variables\n", - " }\n", - " content = request_from_api(\n", - " rest_url=opcua_rest_url,\n", - " method=\"POST\",\n", - " endpoint=\"values/historicalaggregated\",\n", - " data=json.dumps(body, default=opc_data.json_serial),\n", - " headers=opc_data.headers,\n", - " extended_timeout=True\n", - " )\n", - " return pd.json_normalize(\n", - " content,\n", - " record_path=['HistoryReadResults', 'DataValues'],\n", - " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId', 'Id'], ['HistoryReadResults', 'NodeId', 'Namespace']]\n", - " )\n", - "\n", - " dataframes = []\n", - " with concurrent.futures.ThreadPoolExecutor() as executor:\n", - " future_to_batch = {executor.submit(process_batch, batch): batch for batch in batches}\n", - " for future in concurrent.futures.as_completed(future_to_batch):\n", - " dataframes.append(future.result())\n", - "\n", - " combined_df = pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()\n", - "\n", - " columns = {\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n", - " \"HistoryReadResults.NodeId.Id\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " }\n", - "\n", - " return opc_data._process_df(combined_df, columns)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vars = opc_data._get_variable_list_as_list(variable_list)\n", - "extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "body = {\n", - " **opc_data.body,\n", - " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n", - " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n", - " \"ProcessingInterval\": pro_interval,\n", - " \"AggregateName\": agg_name,\n", - " \"ReadValueIds\": extended_variables\n", - "}\n", - "body" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_historical_aggregated_values(opc_data,\n", - " start_time, \n", - " end_time, \n", - " pro_interval, \n", - " agg_name, \n", - " variable_list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "start_time = datetime.now() - relativedelta(months=1)\n", - "end_time = datetime.now()\n", - "get_historical_aggregated_values(opc_data,\n", - " start_time, \n", - " end_time, \n", - " pro_interval, \n", - " agg_name, \n", - " variable_list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# History data for 1 day, 10 min aggregate - stringsets\n", - "history_agg = opc_data.get_historical_aggregated_values(\n", - " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", - " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", - " pro_interval=600000,\n", - " agg_name=\"Average\",\n", - " variable_list=inverters.variables_as_list([\"DCPower\"]),\n", - ")\n", - "history_agg" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "import math\n", - "from pydantic import BaseModel, AnyUrl\n", - "from datetime import timedelta\n", - "import asyncio\n", - "import aiohttp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class Variables(BaseModel):\n", - " \"\"\"Helper class to parse all values api's.\n", - " Variables are described in https://reference.opcfoundation.org/v104/Core/docs/Part3/8.2.1/\n", - "\n", - " Variables:\n", - " Id: str - Id of the signal, e.g. SSO.EG-AS.WeatherSymbol\n", - " Namespace: int - Namespace on the signal, e.g. 2.\n", - " IdType: int - IdTypes described in https://reference.opcfoundation.org/v104/Core/docs/Part3/8.2.3/.\n", - " \"\"\"\n", - " Id: str\n", - " Namespace: int\n", - " IdType: int" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables]) -> dict:\n", - " \"\"\"Make API request for the given time range and variable list\"\"\"\n", - "\n", - " # Creating a new variable list to remove pydantic models\n", - " vars = opc_data._get_variable_list_as_list(variable_list)\n", - "\n", - " extended_variables = [\n", - " {\n", - " \"NodeId\": var,\n", - " \"AggregateName\": agg_name,\n", - " }\n", - " for var in vars\n", - " ]\n", - " body = copy.deepcopy(opc_data.body)\n", - " body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - " body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - " body[\"ProcessingInterval\"] = pro_interval\n", - " body[\"ReadValueIds\"] = extended_variables\n", - " body[\"AggregateName\"] = agg_name\n", - "\n", - " # Make API request using aiohttp session\n", - " async with aiohttp.ClientSession() as session:\n", - " async with session.post(\n", - " f\"{opcua_rest_url}values/historicalaggregated\",\n", - " data=json.dumps(body, default=opc_data.json_serial),\n", - " headers=opc_data.headers,\n", - " timeout=aiohttp.ClientTimeout(total=None) \n", - " ) as response:\n", - " response.raise_for_status()\n", - " content = await response.json()\n", - "\n", - " return content" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vars = opc_data._get_variable_list_as_list(variable_list)\n", - "vars1 = vars[0:5]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "extended_variables = [\n", - " {\n", - " \"NodeId\": var,\n", - " \"AggregateName\": agg_name,\n", - " }\n", - " for var in vars1\n", - "]\n", - "len(extended_variables)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "body = copy.deepcopy(opc_data.body)\n", - "body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - "body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - "body[\"ProcessingInterval\"] = pro_interval\n", - "body[\"ReadValueIds\"] = extended_variables\n", - "body[\"AggregateName\"] = agg_name\n", - "body" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "f\"{opcua_rest_url}values/historicalaggregated\"," - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data=json.dumps(body, default=opc_data.json_serial)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_dict = json.loads(data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "read_value_ids = data_dict['ReadValueIds']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(read_value_ids)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "headers=opc_data.headers\n", - "headers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "timeout=aiohttp.ClientTimeout(total=None) \n", - "timeout" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "async with aiohttp.ClientSession() as session:\n", - " async with session.post(\n", - " f\"{opcua_rest_url}values/historicalaggregated\",\n", - " data=json.dumps(body, default=opc_data.json_serial),\n", - " headers=opc_data.headers,\n", - " timeout=aiohttp.ClientTimeout(total=None) \n", - " ) as response:\n", - " response.raise_for_status()\n", - " content = await response.json()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "content" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def generate_time_batches(start_time: datetime, end_time: datetime, pro_interval: int, batch_size: int) -> list[tuple]:\n", - " \"\"\"Generate time batches based on start time, end time, processing interval, and batch size\"\"\"\n", - "\n", - " total_time_range = end_time - start_time\n", - " pro_interval_seconds = (pro_interval / 1000)\n", - " total_data_points = (total_time_range.total_seconds() // pro_interval_seconds) + 1\n", - "\n", - " total_batches = math.ceil(total_data_points / batch_size)\n", - " actual_batch_size = math.ceil(total_data_points / total_batches)\n", - "\n", - " time_batches = [\n", - " (start_time + timedelta(seconds=(i * actual_batch_size * pro_interval_seconds)),\n", - " start_time + timedelta(seconds=((i + 1) * actual_batch_size * pro_interval_seconds)) - timedelta(seconds=pro_interval_seconds))\n", - " for i in range(total_batches)\n", - " ]\n", - "\n", - " return time_batches" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def generate_variable_batches(variable_list: list[Variables], batch_size: int) -> list[list[Variables]]:\n", - " \"\"\"Generate variable batches based on the variable list and batch size\"\"\"\n", - "\n", - " variable_batches = [\n", - " variable_list[i:i + batch_size] for i in range(0, len(variable_list), batch_size)\n", - " ]\n", - "\n", - " return variable_batches" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def process_api_response(opc_data, response: dict) -> pd.DataFrame:\n", - " \"\"\"Process the API response and return the result dataframe\"\"\"\n", - " \n", - " df_result = pd.json_normalize(response, record_path=['HistoryReadResults', 'DataValues'], \n", - " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],\n", - " ['HistoryReadResults', 'NodeId','Namespace']] )\n", - "\n", - " for i, row in df_result.iterrows():\n", - " if not math.isnan(row[\"Value.Type\"]):\n", - " value_type = opc_data._get_value_type(int(row[\"Value.Type\"])).get(\"type\")\n", - " df_result.at[i, \"Value.Type\"] = str(value_type)\n", - "\n", - " df_result.rename(\n", - " columns={\n", - " \"Value.Type\": \"ValueType\",\n", - " \"Value.Body\": \"Value\",\n", - " \"StatusCode.Symbol\": \"StatusSymbol\",\n", - " \"StatusCode.Code\": \"StatusCode\",\n", - " \"SourceTimestamp\": \"Timestamp\",\n", - " \"HistoryReadResults.NodeId.IdType\": \"Id\",\n", - " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n", - " },\n", - " errors=\"raise\",\n", - " inplace=True,\n", - " )\n", - "\n", - " return df_result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "async def get_historical_aggregated_values_async(\n", - " opc_data,\n", - " start_time: datetime,\n", - " end_time: datetime,\n", - " pro_interval: int,\n", - " agg_name: str,\n", - " variable_list: list[Variables],\n", - " batch_size: int = 1000\n", - ") -> pd.DataFrame:\n", - " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n", - "\n", - " \n", - " time_batches = generate_time_batches(start_time, end_time, pro_interval, batch_size)\n", - " variable_batches = generate_variable_batches(variable_list, batch_size)\n", - "\n", - " # Creating tasks for each API request and gathering the results\n", - " tasks = []\n", - "\n", - " for time_batch_start, time_batch_end in time_batches:\n", - " for variable_sublist in variable_batches:\n", - " task = asyncio.create_task(\n", - " make_async_api_request(opc_data, time_batch_start, time_batch_end, pro_interval, agg_name, variable_sublist)\n", - " ) \n", - " tasks.append(task)\n", - " \n", - " # Execute all tasks concurrently and gather their results\n", - " responses = await asyncio.gather(*tasks)\n", - " \n", - " # Processing the API responses\n", - " result_list = []\n", - " for idx, batch_response in enumerate(responses):\n", - " \n", - " batch_result = process_api_response(opc_data, batch_response)\n", - " result_list.append(batch_result)\n", - " \n", - " result_df = pd.concat(result_list, ignore_index=True)\n", - "\n", - " return result_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 1 day aggregated historical inverter data in asyncio process\n", - "one_days_historic_inverter_data2 = await get_historical_aggregated_values_async(\n", - " opc_data,\n", - " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", - " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", - " pro_interval=60*1000,\n", - " agg_name=\"Average\",\n", - " variable_list=string_sets.variables_as_list([\"DCPower\"]),\n", - " batch_size=100\n", - ")\n", - "one_days_historic_inverter_data2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "async def generate_time_chunks(start_time: datetime, end_time: datetime):\n", - " \"\"\"Generate time chunks between start_time and end_time, each chunk_duration_minutes long.\"\"\"\n", - " delta = timedelta(minutes=60)\n", - " current_time = start_time\n", - " while current_time < end_time:\n", - " chunk_end_time = min(current_time + delta, end_time)\n", - " yield (current_time, chunk_end_time)\n", - " current_time = chunk_end_time" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables], max_data_points=500) -> dict:\n", - " \"\"\"Make API request for the given time range and variable list, with additional chunking based on data points.\"\"\"\n", - "\n", - " def chunk_list(lst, n):\n", - " \"\"\"Yield successive n-sized chunks from lst.\"\"\"\n", - " for i in range(0, len(lst), n):\n", - " yield lst[i:i + n]\n", - "\n", - " async def fetch_data_for_time_period(session, vars_chunk, start, end):\n", - " \"\"\"Fetch data for a given time period and chunk of variables.\"\"\"\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars_chunk]\n", - " body = copy.deepcopy(opc_data.body)\n", - " body[\"StartTime\"] = start.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - " body[\"EndTime\"] = end.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - " body[\"ProcessingInterval\"] = pro_interval\n", - " body[\"ReadValueIds\"] = extended_variables\n", - " body[\"AggregateName\"] = agg_name\n", - "\n", - " async with session.post(\n", - " f\"{opcua_rest_url}values/historicalaggregated\",\n", - " data=json.dumps(body, default=str),\n", - " headers=opc_data.headers,\n", - " timeout=aiohttp.ClientTimeout(total=None)\n", - " ) as response:\n", - " response.raise_for_status()\n", - " return await response.json()\n", - "\n", - " # Creating a new variable list to remove pydantic models\n", - " vars = opc_data._get_variable_list_as_list(variable_list)\n", - " chunk_size = 5 # Chunk size for node IDs\n", - " vars_chunks = list(chunk_list(vars, chunk_size))\n", - "\n", - " all_responses = []\n", - " async with aiohttp.ClientSession() as session:\n", - " for vars_chunk in vars_chunks:\n", - " # Generate time chunks for the given time period\n", - " async for start, end in generate_time_chunks(start_time, end_time):\n", - " content = await fetch_data_for_time_period(session, vars_chunk, start, end)\n", - " all_responses.append(content)\n", - " return all_responses" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables]) -> dict:\n", - " \"\"\"Make API request for the given time range and variable list\"\"\"\n", - "\n", - " def chunk_list(lst, n):\n", - " for i in range(0, len(lst), n):\n", - " yield lst[i:i + n]\n", - "\n", - " # Creating a new variable list to remove pydantic models\n", - " vars = opc_data._get_variable_list_as_list(variable_list)\n", - "\n", - " chunk_size = 150 \n", - " vars_chunks = list(chunk_list(vars, chunk_size))\n", - "\n", - " all_responses = []\n", - " async with aiohttp.ClientSession() as session:\n", - " for vars_chunk in vars_chunks:\n", - " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars_chunk]\n", - " body = copy.deepcopy(opc_data.body)\n", - " body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - " body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - " body[\"ProcessingInterval\"] = pro_interval\n", - " body[\"ReadValueIds\"] = extended_variables\n", - " body[\"AggregateName\"] = agg_name\n", - "\n", - " async with session.post(\n", - " f\"{opcua_rest_url}values/historicalaggregated\",\n", - " data=json.dumps(body, default=str),\n", - " headers=opc_data.headers,\n", - " timeout=aiohttp.ClientTimeout(total=None)\n", - " ) as response:\n", - " response.raise_for_status()\n", - " content = await response.json()\n", - " all_responses.append(content) \n", - "\n", - " return all_responses" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datetime import datetime, timedelta\n", - "from typing import List, Tuple" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def generate_time_chunks(start_time: datetime, end_time: datetime, interval_hours: int) -> List[Tuple[datetime, datetime]]:\n", - " \"\"\"Generate time chunks within the given start and end time with specified interval in hours.\"\"\"\n", - " delta = timedelta(hours=interval_hours)\n", - " current_time = start_time\n", - " chunks = []\n", - "\n", - " while current_time < end_time:\n", - " chunk_end_time = min(current_time + delta, end_time) \n", - " chunks.append((current_time, chunk_end_time))\n", - " current_time += delta\n", - "\n", - " return chunks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 1 day aggregated historical inverter data in asyncio process\n", - "one_days_historic_inverter_data2 = await make_async_api_request(\n", - " opc_data,\n", - " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n", - " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n", - " pro_interval=60*1000,\n", - " agg_name=\"Average\",\n", - " variable_list=string_sets.variables_as_list([\"DCPower\"])\n", - ")\n", - "one_days_historic_inverter_data2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.12.1 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.4" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "6b866f0bc560289bf4bb2415ae9074243764eb008c10d00a1da29433677418de" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/pyprediktormapclient/opc_ua.py b/src/pyprediktormapclient/opc_ua.py index fb1cbb9..3c0e8b0 100644 --- a/src/pyprediktormapclient/opc_ua.py +++ b/src/pyprediktormapclient/opc_ua.py @@ -1,20 +1,18 @@ import json -import math import logging -import datetime import copy import pandas as pd +import requests from datetime import date, datetime, timedelta from typing import Dict, List, Any, Union, Optional -from pydantic import BaseModel, AnyUrl, validate_call, ValidationError +from pydantic import BaseModel, AnyUrl from pydantic_core import Url -from pyprediktormapclient.shared import request_from_api, request_from_api_async, ClientPool +from pyprediktormapclient.shared import request_from_api from requests import HTTPError -from aiohttp import ClientSession -from asyncio import Semaphore import asyncio -import requests import aiohttp +from aiohttp import ClientSession +from asyncio import Semaphore logger = logging.getLogger(__name__) @@ -146,7 +144,6 @@ class OPC_UA: class Config: arbitrary_types_allowed = True - def __init__(self, rest_url: AnyUrl, opcua_url: AnyUrl, namespaces: List = None, auth_client: object = None, session: requests.Session = None): """Class initializer @@ -469,70 +466,31 @@ def get_historical_aggregated_values(self, } return self._process_df(df_result, columns) - async def _fetch_data_async(self, endpoint: str, body: Dict, max_retries:int, retry_delay:int) -> pd.DataFrame: - """ - Fetch data from the API and return it as a DataFrame. - """ - for attempt in range(max_retries): - try: - async with ClientSession() as session: - async with session.post( - url=self.rest_url + endpoint, - json=body, - headers=self.headers - ) as response: - response.raise_for_status() - content = await response.json() - except aiohttp.ClientError as e: - if attempt < max_retries - 1: - wait_time = retry_delay * (2 ** attempt) - logger.warning(f"Request failed. Retrying in {wait_time} seconds...") - await asyncio.sleep(wait_time) - else: - logger.error(f"Max retries reached. Error: {e}") - raise RuntimeError(f'Error message {e}') - self._check_content(content) - - df_list = [] - for item in content["HistoryReadResults"]: - df = pd.json_normalize(item["DataValues"]) - for key, value in item["NodeId"].items(): - df[f"HistoryReadResults.NodeId.{key}"] = value - df_list.append(df) - - if df_list: - df_result = pd.concat(df_list) - df_result.reset_index(inplace=True, drop=True) - return df_result - - return df_result - - async def get_historical_aggregated_values_async( - self, - start_time: datetime, - end_time: datetime, - pro_interval: int, - agg_name: str, - variable_list: List[Variables], - max_data_points: int = 10000, - max_retries: int = 3, - retry_delay: int = 5, - max_concurrent_requests: int = 10 + async def get_raw_historical_values_async( + self, + start_time: datetime, + end_time: datetime, + variable_list: list, + limit_start_index: Union[int, None] = None, + limit_num_records: Union[int, None] = None, + max_data_points: int = 10000, + max_retries: int = 3, + retry_delay: int = 5, + max_concurrent_requests: int = 35 ) -> pd.DataFrame: + """Request historical aggregated values from the OPC UA server with batching""" total_time_range_ms = (end_time - start_time).total_seconds() * 1000 - estimated_intervals = total_time_range_ms / pro_interval - + estimated_intervals = total_time_range_ms / max_data_points + max_variables_per_batch = max(1, int(max_data_points / estimated_intervals)) max_time_batches = max(1, int(estimated_intervals / max_data_points)) time_batch_size_ms = total_time_range_ms / max_time_batches - extended_variables = [{"NodeId": var, "AggregateName": agg_name} for var in variable_list] - variable_batches = [ - extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch) - ] + extended_variables = [{"NodeId": var} for var in variable_list] + variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)] all_results = [] semaphore = Semaphore(max_concurrent_requests) @@ -544,18 +502,49 @@ async def process_batch(variables, time_batch): batch_start = start_time + timedelta(milliseconds=batch_start_ms) batch_end = start_time + timedelta(milliseconds=batch_end_ms) - additional_params = { - "ProcessingInterval": pro_interval, - "AggregateName": agg_name + body = { + **self.body, + "StartTime": batch_start.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + "EndTime": batch_end.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + "ReadValueIds": variables, } - body = self._prepare_body( - batch_start, - batch_end, - variables, - additional_params - ) - df_result = await self._fetch_data_async("values/historicalaggregated", body, max_retries, retry_delay) - return df_result + + if limit_start_index is not None and limit_num_records is not None: + body["Limit"] = {"StartIndex": limit_start_index, "NumRecords": limit_num_records} + + for attempt in range(max_retries): + try: + async with ClientSession() as session: + async with session.post( + f"{self.rest_url}values/historical", + json=body, + headers=self.headers + ) as response: + response.raise_for_status() + content = await response.json() + break + except aiohttp.ClientError as e: + if attempt < max_retries - 1: + wait_time = retry_delay * (2 ** attempt) + logger.warning(f"Request failed. Retrying in {wait_time} seconds...") + await asyncio.sleep(wait_time) + else: + logger.error(f"Max retries reached. Error: {e}") + raise RuntimeError(f'Error message {e}') + + self._check_content(content) + + df_list = [] + for item in content["HistoryReadResults"]: + df = pd.json_normalize(item["DataValues"]) + for key, value in item["NodeId"].items(): + df[f"HistoryReadResults.NodeId.{key}"] = value + df_list.append(df) + + if df_list: + df_result = pd.concat(df_list) + df_result.reset_index(inplace=True, drop=True) + return df_result tasks = [ process_batch(variables, time_batch) @@ -567,35 +556,33 @@ async def process_batch(variables, time_batch): all_results.extend(results) logger.info("Combining all batches...") - combined_df = pd.concat(results, ignore_index=True) - + combined_df = pd.concat(all_results, ignore_index=True) columns = { - "Value.Type": "ValueType", - "Value.Body": "Value", - "StatusCode.Symbol": "StatusSymbol", - "StatusCode.Code": "StatusCode", - "SourceTimestamp": "Timestamp", - "HistoryReadResults.NodeId.IdType": "IdType", - "HistoryReadResults.NodeId.Id": "Id", - "HistoryReadResults.NodeId.Namespace": "Namespace", + "Value.Type": "ValueType", + "Value.Body": "Value", + "StatusCode.Symbol": "StatusSymbol", + "StatusCode.Code": "StatusCode", + "SourceTimestamp": "Timestamp", + "HistoryReadResults.NodeId.IdType": "IdType", + "HistoryReadResults.NodeId.Id": "Id", + "HistoryReadResults.NodeId.Namespace": "Namespace", } return self._process_df(combined_df, columns) - - async def get_historical_aggregated_values_batch_time_vars_async( + + async def get_historical_aggregated_values_async( self, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, - variable_list: List[str], - max_data_points: int = 100000, + variable_list: list, + max_data_points: int = 10000, max_retries: int = 3, retry_delay: int = 5, - max_concurrent_requests: int = 50 + max_concurrent_requests: int = 35 ) -> pd.DataFrame: - logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - logger = logging.getLogger(__name__) + """Request historical aggregated values from the OPC UA server with batching""" total_time_range_ms = (end_time - start_time).total_seconds() * 1000 estimated_intervals = total_time_range_ms / pro_interval @@ -604,14 +591,15 @@ async def get_historical_aggregated_values_batch_time_vars_async( max_time_batches = max(1, int(estimated_intervals / max_data_points)) time_batch_size_ms = total_time_range_ms / max_time_batches - extended_variables = [{"NodeId": var, "AggregateName": agg_name} for var in variable_list] + extended_variables = [ + {"NodeId": var, "AggregateName": agg_name} for var in variable_list + ] variable_batches = [ extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch) ] all_results = [] semaphore = Semaphore(max_concurrent_requests) - client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers) async def process_batch(variables, time_batch): async with semaphore: @@ -628,18 +616,18 @@ async def process_batch(variables, time_batch): "ReadValueIds": variables, "AggregateName": agg_name } - for attempt in range(max_retries): try: - content = await request_from_api_async( - client_pool, - method="POST", - endpoint=f"/values/historicalaggregated", - data=json.dumps(body, default=self.json_serial), - extended_timeout=True - ) - break - except (aiohttp.ClientError, ValidationError) as e: + async with ClientSession() as session: + async with session.post( + f"{self.rest_url}values/historicalaggregated", + json=body, + headers=self.headers + ) as response: + response.raise_for_status() + content = await response.json() + break + except aiohttp.ClientError as e: if attempt < max_retries - 1: wait_time = retry_delay * (2 ** attempt) logger.warning(f"Request failed. Retrying in {wait_time} seconds...") @@ -668,25 +656,24 @@ async def process_batch(variables, time_batch): for time_batch in range(max_time_batches) ] - try: - results = await asyncio.gather(*tasks) - all_results.extend(results) - - logger.info("Combining all batches...") - combined_df = pd.concat(all_results, ignore_index=True) - columns = { - "Value.Type": "ValueType", - "Value.Body": "Value", - "StatusCode.Symbol": "StatusSymbol", - "StatusCode.Code": "StatusCode", - "SourceTimestamp": "Timestamp", - "HistoryReadResults.NodeId.IdType": "IdType", - "HistoryReadResults.NodeId.Id": "Id", - "HistoryReadResults.NodeId.Namespace": "Namespace", - } - return self._process_df(combined_df, columns) - finally: - await client_pool.close_all() + results = await asyncio.gather(*tasks) + all_results.extend(results) + + logger.info("Combining all batches...") + combined_df = pd.concat(results, ignore_index=True) + + columns = { + "Value.Type": "ValueType", + "Value.Body": "Value", + "StatusCode.Symbol": "StatusSymbol", + "StatusCode.Code": "StatusCode", + "SourceTimestamp": "Timestamp", + "HistoryReadResults.NodeId.IdType": "IdType", + "HistoryReadResults.NodeId.Id": "Id", + "HistoryReadResults.NodeId.Namespace": "Namespace", + } + return self._process_df(combined_df, columns) + def write_values(self, variable_list: List[WriteVariables]) -> List: """Request to write realtime values to the OPC UA server diff --git a/src/pyprediktormapclient/shared.py b/src/pyprediktormapclient/shared.py index b111ce1..09511f9 100644 --- a/src/pyprediktormapclient/shared.py +++ b/src/pyprediktormapclient/shared.py @@ -7,25 +7,6 @@ class Config: arbitrary_types_allowed = True -class ClientPool: - def __init__(self, num_clients: int, rest_url: str, headers: Dict[str, str]): - self.clients = asyncio.Queue() - for _ in range(num_clients): - self.clients.put_nowait(aiohttp.ClientSession(base_url=rest_url, headers=headers)) - self.num_clients = num_clients - - async def get_client(self): - client = await self.clients.get() - return client - - async def release_client(self, client): - await self.clients.put(client) - - async def close_all(self): - while not self.clients.empty(): - client = await self.clients.get() - await client.close() - def request_from_api( rest_url: AnyUrl, method: Literal["GET", "POST"], @@ -71,34 +52,3 @@ def request_from_api( else: return {"error": "Non-JSON response", "content": result.text} - -async def request_from_api_async( - client_pool: ClientPool, - method: str, - endpoint: str, - data: str = None, - params: Dict[str, Any] = None, - extended_timeout: bool = False, -) -> Dict[str, Any]: - timeout = aiohttp.ClientTimeout(total=300 if extended_timeout else 30) - client = await client_pool.get_client() - - try: - if method == "GET": - async with client.get(endpoint, params=params, timeout=timeout) as response: - response.raise_for_status() - if 'application/json' in response.headers.get('Content-Type', ''): - return await response.json() - else: - return {"error": "Non-JSON response", "content": await response.text()} - elif method == "POST": - async with client.post(endpoint, data=data, params=params, timeout=timeout) as response: - response.raise_for_status() - if 'application/json' in response.headers.get('Content-Type', ''): - return await response.json() - else: - return {"error": "Non-JSON response", "content": await response.text()} - else: - raise ValidationError("Unsupported method") - finally: - await client_pool.release_client(client)