diff --git a/notebooks/Exploring_API_Functions_Authentication.ipynb b/notebooks/Exploring_API_Functions_Authentication.ipynb
index bb03e1c..85c023a 100644
--- a/notebooks/Exploring_API_Functions_Authentication.ipynb
+++ b/notebooks/Exploring_API_Functions_Authentication.ipynb
@@ -250,23 +250,6 @@
"opc_data = OPC_UA(rest_url=opcua_rest_url, opcua_url=opcua_server_url, namespaces=namespace_list, auth_client=auth_client)"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# History data\n",
- "inverters_hist_df = opc_data.get_historical_aggregated_values(\n",
- " start_time=datetime.datetime.now() - datetime.timedelta(2),\n",
- " end_time=datetime.datetime.now() - datetime.timedelta(1),\n",
- " pro_interval=600000,\n",
- " agg_name=\"Average\",\n",
- " variable_list=inverters.variables_as_list([\"DCPower\"]),\n",
- ")\n",
- "inverters_hist_df"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -286,32 +269,13 @@
"metadata": {},
"outputs": [],
"source": [
- "# Historic value data of trackers, 1 days worth of data 30 days ago\n",
- "one_day_historic_tracker_data = opc_data.get_historical_aggregated_values(\n",
- " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
- " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
- " pro_interval=3600000,\n",
- " agg_name=\"Average\",\n",
- " variable_list=string_sets_for_first_park.variables_as_list([\"DCPower\"]),\n",
- ")\n",
- "one_day_historic_tracker_data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1 day aggregated historical inverter data in asyncio process\n",
- "one_days_historic_inverter_data2 = await opc_data.get_historical_aggregated_values_async(\n",
- " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
- " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
- " pro_interval=60*1000,\n",
- " agg_name=\"Average\",\n",
+ "# 1 day raw historical data\n",
+ "one_day_raw_historical_data = await opc_data.get_raw_historical_values_async(\n",
+ " start_time = datetime.datetime(2023, 11, 13, 00, 00),\n",
+ " end_time = datetime.datetime(2023, 11, 13, 23, 59),\n",
" variable_list=string_sets_for_first_park.variables_as_list([\"DCPower\"])\n",
")\n",
- "one_days_historic_inverter_data2"
+ "one_day_raw_historical_data"
]
},
{
@@ -321,9 +285,9 @@
"outputs": [],
"source": [
"# 1 day aggregated historical data\n",
- "one_day_historical_data = await opc_data.get_historical_aggregated_values_batch_time_vars_async(\n",
- " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
- " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
+ "one_day_historical_data = await opc_data.get_historical_aggregated_values_async(\n",
+ " start_time = datetime.datetime(2023, 11, 13, 00, 00),\n",
+ " end_time = datetime.datetime(2023, 11, 13, 23, 59),\n",
" pro_interval=60*1000,\n",
" agg_name=\"Average\",\n",
" variable_list=string_sets_for_first_park.variables_as_list([\"DCPower\"])\n",
diff --git a/notebooks/api_performance_testing.ipynb b/notebooks/api_performance_testing.ipynb
new file mode 100644
index 0000000..89517a2
--- /dev/null
+++ b/notebooks/api_performance_testing.ipynb
@@ -0,0 +1,2189 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook explores both model index and opc ua scripts and contain examples of all the functions to make request to model index api and opc ua api servers. "
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Import Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import the required packeages\n",
+ "import pandas as pd\n",
+ "import os\n",
+ "import json\n",
+ "import datetime\n",
+ "import concurrent.futures\n",
+ "from dotenv import load_dotenv\n",
+ "from pathlib import Path\n",
+ "from dateutil.relativedelta import relativedelta"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Import Scripts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import model index functions\n",
+ "from pyprediktormapclient.model_index import ModelIndex\n",
+ "\n",
+ "# Import OPC UA functions\n",
+ "from pyprediktormapclient.opc_ua import OPC_UA\n",
+ "\n",
+ "# Import Analytics Helper\n",
+ "from pyprediktormapclient.analytics_helper import AnalyticsHelper\n",
+ "\n",
+ "# Import \"Dataframer\" Tools\n",
+ "from pyprediktormapclient.shared import *\n",
+ "\n",
+ "# import AUTH_CLIENT\n",
+ "from pyprediktormapclient.auth_client import AUTH_CLIENT"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Consider obtaining the envrionment variables from .env file if you are running this locally from source.\n",
+ "dotenv_path = Path(\".env\")\n",
+ "load_dotenv(dotenv_path=dotenv_path)\n",
+ "\n",
+ "username = os.environ[\"USERNAME\"]\n",
+ "password = os.environ[\"PASSWORD\"]\n",
+ "opcua_rest_url = os.environ[\"OPC_UA_REST_URL\"]\n",
+ "opcua_server_url = os.environ[\"OPC_UA_SERVER_URL\"]\n",
+ "model_index_url = os.environ[\"MODEL_INDEX_URL\"]\n",
+ "ory_url = os.environ[\"ORY_URL\"]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Getting ory bearer token\n",
+ "auth_client = AUTH_CLIENT(rest_url=ory_url, username=username, password=password)\n",
+ "auth_client.request_new_ory_token()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Connecting to ModelIndex APIs \n",
+ "model = ModelIndex(url=model_index_url, auth_client=auth_client, session=auth_client.session)\n",
+ "\n",
+ "# Listed sites on the model index api server\n",
+ "namespaces = model.get_namespace_array()\n",
+ "# Types of Objects\n",
+ "object_types_json = model.get_object_types()\n",
+ "object_types = AnalyticsHelper(object_types_json)\n",
+ "namespace_list = object_types.namespaces_as_list(namespaces)\n",
+ "\n",
+ "# Initate the OPC UA API with a fixed namespace list\n",
+ "opc_data = OPC_UA(rest_url=opcua_rest_url, opcua_url=opcua_server_url, namespaces=namespace_list, auth_client=auth_client)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Download data from modelindex api"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Unique types of Objects\n",
+ "object_types_unique = object_types.dataframe[[\"Id\", \"Name\"]].drop_duplicates()\n",
+ "object_types_unique"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To get the objects of a type\n",
+ "sites_json = model.get_objects_of_type(\"SiteType\")\n",
+ "\n",
+ "# Send the returned JSON into a normalizer to get Id, Type, Name, Props and Vars as columns\n",
+ "sites = AnalyticsHelper(sites_json)\n",
+ "sites.list_of_names()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Analytics helper\n",
+ "sites.variables_as_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sites.list_of_ids()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Selecting the second site\n",
+ "first_site_id = sites.list_of_ids()[0]\n",
+ "# first_site_id = '14:1:BE.DK-ADU'\n",
+ "first_site_id"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Get all stringsets for one park\n",
+ "string_sets_for_first_park_as_json = model.get_object_descendants(\n",
+ " \"StringSetType\", [first_site_id], \"PV_Assets\"\n",
+ ")\n",
+ "string_sets = AnalyticsHelper(string_sets_for_first_park_as_json)\n",
+ "string_sets.dataframe"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Query Parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "variable_list =string_sets.variables_as_list([\"DCPower\"])\n",
+ "start_time = datetime.datetime(2023, 11, 13, 00, 00)\n",
+ "end_time = datetime.datetime(2023, 11, 13, 23, 59)\n",
+ "pro_interval=60*1000\n",
+ "agg_name=\"Average\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Batching with Async Refactoring"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "import asyncio\n",
+ "import aiohttp\n",
+ "from aiohttp import ClientSession\n",
+ "from asyncio import Semaphore\n",
+ "from datetime import timedelta\n",
+ "from typing import Dict, List, Tuple\n",
+ "\n",
+ "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
+ "logger = logging.getLogger(__name__)\n",
+ "\n",
+ "async def generate_time_batches(start_time: datetime, end_time: datetime, pro_interval: int, max_data_points: int) -> List[tuple]:\n",
+ " \"\"\"Generate time batches based on start time, end time, processing interval, and batch size\"\"\"\n",
+ "\n",
+ " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
+ " estimated_intervals = total_time_range_ms / pro_interval\n",
+ " \n",
+ " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
+ " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
+ "\n",
+ " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
+ "\n",
+ " return total_time_range_ms, max_variables_per_batch, time_batch_size_ms, max_time_batches\n",
+ "\n",
+ "def generate_variable_batches(start_time, end_time, pro_interval, variable_list: List[Dict[str, str]], max_data_points) -> List:\n",
+ " \"\"\"Generate variable batches based on the variable list and batch size\"\"\"\n",
+ "\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
+ " max_variables_per_batch = generate_time_batches(start_time, end_time, pro_interval, max_data_points)[1]\n",
+ "\n",
+ " variable_batches = [\n",
+ " extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)\n",
+ " ]\n",
+ "\n",
+ " return variable_batches\n",
+ "\n",
+ "def _prepare_body(\n",
+ " start_time: datetime,\n",
+ " end_time: datetime,\n",
+ " pro_interval: int,\n",
+ " variable_list: List[Dict[str, str]], \n",
+ " agg_name: str,\n",
+ " ) -> Dict:\n",
+ " \"\"\"\n",
+ " Prepare the request body for the API call.\n",
+ " \"\"\"\n",
+ " total_time_range_ms, max_variables_per_batch, time_batch_size_ms, max_time_batches = generate_time_batches(\n",
+ " start_time, end_time, pro_interval, 10000)\n",
+ "\n",
+ " for time_batch in range(max_time_batches):\n",
+ " batch_start_ms = time_batch * time_batch_size_ms\n",
+ " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
+ " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
+ " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
+ "\n",
+ " variable_batches = generate_variable_batches(variable_list)\n",
+ "\n",
+ " for variables in variable_batches:\n",
+ " body = {\n",
+ " **opc_data.body,\n",
+ " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"ProcessingInterval\": pro_interval,\n",
+ " \"ReadValueIds\": variables,\n",
+ " \"AggregateName\": agg_name\n",
+ " }\n",
+ " return body\n",
+ " \n",
+ "def process_batch(content: dict) -> pd.DataFrame:\n",
+ " \"\"\" Process individual batch of data \"\"\"\n",
+ " \n",
+ " df_list = []\n",
+ " for item in content[\"HistoryReadResults\"]:\n",
+ " df = pd.json_normalize(item[\"DataValues\"])\n",
+ " for key, value in item[\"NodeId\"].items():\n",
+ " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n",
+ " df_list.append(df)\n",
+ " \n",
+ " if df_list:\n",
+ " df_result = pd.concat(df_list)\n",
+ " df_result.reset_index(inplace=True, drop=True)\n",
+ " return df_result\n",
+ " else:\n",
+ " return pd.DataFrame()\n",
+ " \n",
+ "async def make_async_api_request(opc_data, start_time:datetime, end_time:datetime,\n",
+ " pro_interval: int, variable_list: List[Dict[str, str]], agg_name: str,\n",
+ " semaphore, max_retries: int = 3, retry_delay: int = 5) -> dict:\n",
+ " \n",
+ " \"\"\"Make API request for the given time range and variable list\"\"\"\n",
+ "\n",
+ " async with semaphore:\n",
+ " body = _prepare_body(\n",
+ " start_time, \n",
+ " end_time, \n",
+ " pro_interval, \n",
+ " variable_list,\n",
+ " agg_name\n",
+ " )\n",
+ " for attempt in range(max_retries):\n",
+ " try:\n",
+ " async with ClientSession() as session:\n",
+ " async with session.post(\n",
+ " f\"{opcua_rest_url}values/historicalaggregated\",\n",
+ " json=body,\n",
+ " headers=opc_data.headers\n",
+ " ) as response:\n",
+ " response.raise_for_status()\n",
+ " content = await response.json()\n",
+ " break\n",
+ " except aiohttp.ClientError as e:\n",
+ " if attempt < max_retries - 1:\n",
+ " wait_time = retry_delay * (2 ** attempt)\n",
+ " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
+ " await asyncio.sleep(wait_time)\n",
+ " else:\n",
+ " logger.error(f\"Max retries reached. Error: {e}\")\n",
+ " raise RuntimeError(f'Error message {e}')\n",
+ "\n",
+ " opc_data._check_content(content)\n",
+ "\n",
+ " df_result = process_batch(content)\n",
+ " return df_result\n",
+ " \n",
+ "async def process_api_response(opc_data, start_time:datetime, end_time:datetime,\n",
+ " pro_interval: int, variable_list: List[Dict[str, str]], agg_name: str,\n",
+ " max_concurrent_requests: int = 10) -> pd.DataFrame:\n",
+ " \"\"\" Process API response asynchronously and return the result dataframe \"\"\"\n",
+ " all_results = []\n",
+ " semaphore = Semaphore(max_concurrent_requests)\n",
+ "\n",
+ " tasks = [\n",
+ " make_async_api_request(opc_data, start_time, end_time, pro_interval, variable_list, agg_name, semaphore)\n",
+ " ]\n",
+ " results = await asyncio.gather(*tasks)\n",
+ " all_results.extend(results)\n",
+ " \n",
+ " if all_results:\n",
+ " combined_df = pd.concat(all_results, ignore_index=True)\n",
+ " combined_df.reset_index(inplace=True, drop=True)\n",
+ " columns = {\n",
+ " \"Value.Type\": \"ValueType\",\n",
+ " \"Value.Body\": \"Value\",\n",
+ " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " \"StatusCode.Code\": \"StatusCode\",\n",
+ " \"SourceTimestamp\": \"Timestamp\",\n",
+ " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " }\n",
+ " return opc_data._process_df(combined_df, columns)\n",
+ " else:\n",
+ " return pd.DataFrame()\n",
+ " \n",
+ "async def get_historical_aggregated_values_async(\n",
+ " opc_data,\n",
+ " start_time: datetime,\n",
+ " end_time: datetime,\n",
+ " pro_interval: int,\n",
+ " variable_list: List[Dict[str, str]],\n",
+ " agg_name: str,\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n",
+ "\n",
+ " \n",
+ " result_df = await process_api_response(opc_data, start_time, end_time, pro_interval, variable_list, agg_name)\n",
+ "\n",
+ " return result_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1 day aggregated historical inverter data in asyncio process\n",
+ "one_days_historic_inverter_data2 = await get_historical_aggregated_values_batch_time_vars_async(\n",
+ " start_time=start_time,\n",
+ " end_time=end_time,\n",
+ " pro_interval=60*1000,\n",
+ " agg_name=\"Average\",\n",
+ " variable_list=string_sets.variables_as_list([\"DCPower\"])\n",
+ ")\n",
+ "one_days_historic_inverter_data2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Batching with Async"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "import asyncio\n",
+ "import aiohttp\n",
+ "from aiohttp import ClientSession\n",
+ "from asyncio import Semaphore\n",
+ "from datetime import timedelta\n",
+ "\n",
+ "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
+ "logger = logging.getLogger(__name__)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def get_historical_aggregated_values_batch_time_vars_async(\n",
+ " self, \n",
+ " start_time: datetime, \n",
+ " end_time: datetime, \n",
+ " pro_interval: int, \n",
+ " agg_name: str, \n",
+ " variable_list: list, \n",
+ " max_data_points: int = 10000, \n",
+ " max_retries: int = 3, \n",
+ " retry_delay: int = 5, \n",
+ " max_concurrent_requests: int = 10\n",
+ ") -> pd.DataFrame:\n",
+ " \n",
+ " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n",
+ "\n",
+ " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
+ " estimated_intervals = total_time_range_ms / pro_interval\n",
+ "\n",
+ " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
+ " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
+ " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
+ "\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
+ " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
+ "\n",
+ " all_results = []\n",
+ " semaphore = Semaphore(max_concurrent_requests)\n",
+ "\n",
+ " async def process_batch(variables, time_batch):\n",
+ " async with semaphore:\n",
+ " batch_start_ms = time_batch * time_batch_size_ms\n",
+ " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
+ " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
+ " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
+ "\n",
+ " body = {\n",
+ " **self.body,\n",
+ " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"ProcessingInterval\": pro_interval,\n",
+ " \"ReadValueIds\": variables,\n",
+ " \"AggregateName\": agg_name\n",
+ " }\n",
+ " print(body)\n",
+ "\n",
+ " for attempt in range(max_retries):\n",
+ " try:\n",
+ " async with ClientSession() as session:\n",
+ " async with session.post(\n",
+ " f\"{self.rest_url}values/historicalaggregated\",\n",
+ " json=body,\n",
+ " headers=self.headers\n",
+ " ) as response:\n",
+ " response.raise_for_status()\n",
+ " content = await response.json()\n",
+ " break\n",
+ " except aiohttp.ClientError as e:\n",
+ " if attempt < max_retries - 1:\n",
+ " wait_time = retry_delay * (2 ** attempt)\n",
+ " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
+ " await asyncio.sleep(wait_time)\n",
+ " else:\n",
+ " logger.error(f\"Max retries reached. Error: {e}\")\n",
+ " raise RuntimeError(f'Error message {e}')\n",
+ "\n",
+ " self._check_content(content)\n",
+ "\n",
+ " df_list = []\n",
+ " for item in content[\"HistoryReadResults\"]:\n",
+ " df = pd.json_normalize(item[\"DataValues\"])\n",
+ " for key, value in item[\"NodeId\"].items():\n",
+ " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n",
+ " df_list.append(df)\n",
+ " \n",
+ " if df_list:\n",
+ " df_result = pd.concat(df_list)\n",
+ " df_result.reset_index(inplace=True, drop=True)\n",
+ " return df_result\n",
+ "\n",
+ " tasks = [\n",
+ " process_batch(variables, time_batch)\n",
+ " for variables in variable_batches\n",
+ " for time_batch in range(max_time_batches)\n",
+ " ]\n",
+ "\n",
+ " results = await asyncio.gather(*tasks)\n",
+ " all_results.extend(results)\n",
+ "\n",
+ " logger.info(\"Combining all batches...\")\n",
+ " combined_df = pd.concat(all_results, ignore_index=True)\n",
+ " columns = {\n",
+ " \"Value.Type\": \"ValueType\",\n",
+ " \"Value.Body\": \"Value\",\n",
+ " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " \"StatusCode.Code\": \"StatusCode\",\n",
+ " \"SourceTimestamp\": \"Timestamp\",\n",
+ " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " }\n",
+ " return self._process_df(combined_df, columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1 day aggregated historical data\n",
+ "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n",
+ " opc_data,\n",
+ " start_time=start_time,\n",
+ " end_time=end_time,\n",
+ " pro_interval=pro_interval,\n",
+ " agg_name=agg_name,\n",
+ " variable_list=variable_list,\n",
+ " max_data_points=10000,\n",
+ " max_concurrent_requests=40\n",
+ ")\n",
+ "one_day_historical_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Batching with Async for Raw Historical Data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from typing import Dict, List, Any, Union, Optional"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def get_raw_historical_values_batch_time_vars_async(\n",
+ " self, \n",
+ " start_time: datetime, \n",
+ " end_time: datetime, \n",
+ " variable_list: list, \n",
+ " limit_start_index: Union[int, None] = None, \n",
+ " limit_num_records: Union[int, None] = None,\n",
+ " max_data_points: int = 10000, \n",
+ " max_retries: int = 3, \n",
+ " retry_delay: int = 5, \n",
+ " max_concurrent_requests: int = 10\n",
+ ") -> pd.DataFrame:\n",
+ " \n",
+ " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n",
+ "\n",
+ " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
+ " estimated_intervals = total_time_range_ms / max_data_points\n",
+ "\n",
+ " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
+ " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
+ " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
+ "\n",
+ " extended_variables = [{\"NodeId\": var} for var in variable_list]\n",
+ " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
+ "\n",
+ " all_results = []\n",
+ " semaphore = Semaphore(max_concurrent_requests)\n",
+ "\n",
+ " async def process_batch(variables, time_batch):\n",
+ " async with semaphore:\n",
+ " batch_start_ms = time_batch * time_batch_size_ms\n",
+ " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
+ " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
+ " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
+ "\n",
+ " body = {\n",
+ " **self.body,\n",
+ " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"ReadValueIds\": variables,\n",
+ " }\n",
+ " \n",
+ " if limit_start_index is not None and limit_num_records is not None:\n",
+ " body[\"Limit\"] = {\"StartIndex\": limit_start_index, \"NumRecords\": limit_num_records}\n",
+ "\n",
+ " for attempt in range(max_retries):\n",
+ " try:\n",
+ " async with ClientSession() as session:\n",
+ " async with session.post(\n",
+ " f\"{self.rest_url}values/historical\",\n",
+ " json=body,\n",
+ " headers=self.headers\n",
+ " ) as response:\n",
+ " response.raise_for_status()\n",
+ " content = await response.json()\n",
+ " break\n",
+ " except aiohttp.ClientError as e:\n",
+ " if attempt < max_retries - 1:\n",
+ " wait_time = retry_delay * (2 ** attempt)\n",
+ " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
+ " await asyncio.sleep(wait_time)\n",
+ " else:\n",
+ " logger.error(f\"Max retries reached. Error: {e}\")\n",
+ " raise RuntimeError(f'Error message {e}')\n",
+ "\n",
+ " self._check_content(content)\n",
+ "\n",
+ " df_list = []\n",
+ " for item in content[\"HistoryReadResults\"]:\n",
+ " df = pd.json_normalize(item[\"DataValues\"])\n",
+ " for key, value in item[\"NodeId\"].items():\n",
+ " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n",
+ " df_list.append(df)\n",
+ " \n",
+ " if df_list:\n",
+ " df_result = pd.concat(df_list)\n",
+ " df_result.reset_index(inplace=True, drop=True)\n",
+ " return df_result\n",
+ "\n",
+ " tasks = [\n",
+ " process_batch(variables, time_batch)\n",
+ " for variables in variable_batches\n",
+ " for time_batch in range(max_time_batches)\n",
+ " ]\n",
+ "\n",
+ " results = await asyncio.gather(*tasks)\n",
+ " all_results.extend(results)\n",
+ "\n",
+ " logger.info(\"Combining all batches...\")\n",
+ " combined_df = pd.concat(all_results, ignore_index=True)\n",
+ " columns = {\n",
+ " \"Value.Type\": \"ValueType\",\n",
+ " \"Value.Body\": \"Value\",\n",
+ " \"SourceTimestamp\": \"Timestamp\",\n",
+ " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " }\n",
+ " return self._process_df(combined_df, columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1 day raw historical data\n",
+ "one_day_raw_historical_data = await get_raw_historical_values_batch_time_vars_async(\n",
+ " opc_data,\n",
+ " start_time=start_time,\n",
+ " end_time=end_time,\n",
+ " variable_list=variable_list,\n",
+ " max_data_points=10000,\n",
+ " max_concurrent_requests=35\n",
+ ")\n",
+ "one_day_raw_historical_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Async with ClientPool"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import asyncio\n",
+ "import aiohttp\n",
+ "from aiohttp import ClientSession\n",
+ "from asyncio import Semaphore\n",
+ "from typing import List, Dict, Any\n",
+ "from datetime import datetime, timedelta\n",
+ "import pandas as pd\n",
+ "import logging\n",
+ "from pydantic import AnyUrl, ValidationError"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class ClientPool:\n",
+ " def __init__(self, num_clients: int, rest_url: str, headers: Dict[str, str]):\n",
+ " self.clients = asyncio.Queue()\n",
+ " for _ in range(num_clients):\n",
+ " self.clients.put_nowait(aiohttp.ClientSession(base_url=rest_url, headers=headers))\n",
+ " self.num_clients = num_clients\n",
+ "\n",
+ " async def get_client(self):\n",
+ " client = await self.clients.get()\n",
+ " return client\n",
+ "\n",
+ " async def release_client(self, client):\n",
+ " await self.clients.put(client)\n",
+ "\n",
+ " async def close_all(self):\n",
+ " while not self.clients.empty():\n",
+ " client = await self.clients.get()\n",
+ " await client.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def request_from_api_async(\n",
+ " client_pool: ClientPool,\n",
+ " method: str,\n",
+ " endpoint: str,\n",
+ " data: str = None,\n",
+ " params: Dict[str, Any] = None,\n",
+ " extended_timeout: bool = False,\n",
+ ") -> Dict[str, Any]:\n",
+ " timeout = aiohttp.ClientTimeout(total=300 if extended_timeout else 30)\n",
+ " client = await client_pool.get_client()\n",
+ " \n",
+ " try:\n",
+ " if method == \"GET\":\n",
+ " async with client.get(endpoint, params=params, timeout=timeout) as response:\n",
+ " response.raise_for_status()\n",
+ " if 'application/json' in response.headers.get('Content-Type', ''):\n",
+ " return await response.json()\n",
+ " else:\n",
+ " return {\"error\": \"Non-JSON response\", \"content\": await response.text()}\n",
+ " elif method == \"POST\":\n",
+ " async with client.post(endpoint, data=data, params=params, timeout=timeout) as response:\n",
+ " response.raise_for_status()\n",
+ " if 'application/json' in response.headers.get('Content-Type', ''):\n",
+ " return await response.json()\n",
+ " else:\n",
+ " return {\"error\": \"Non-JSON response\", \"content\": await response.text()}\n",
+ " else:\n",
+ " raise ValidationError(\"Unsupported method\")\n",
+ " finally:\n",
+ " await client_pool.release_client(client)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def get_historical_aggregated_values_batch_time_vars_async(\n",
+ " self,\n",
+ " start_time: datetime,\n",
+ " end_time: datetime,\n",
+ " pro_interval: int,\n",
+ " agg_name: str,\n",
+ " variable_list: List[str],\n",
+ " max_data_points: int = 100000,\n",
+ " max_retries: int = 3,\n",
+ " retry_delay: int = 5,\n",
+ " max_concurrent_requests: int = 55\n",
+ ") -> pd.DataFrame:\n",
+ " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
+ " logger = logging.getLogger(__name__)\n",
+ "\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
+ " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
+ " estimated_intervals = total_time_range_ms / pro_interval\n",
+ " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
+ " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
+ " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
+ " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
+ "\n",
+ " all_results = []\n",
+ " semaphore = Semaphore(max_concurrent_requests)\n",
+ " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n",
+ "\n",
+ " async def process_batch(variables, time_batch):\n",
+ " async with semaphore:\n",
+ " batch_start_ms = time_batch * time_batch_size_ms\n",
+ " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
+ " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
+ " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
+ "\n",
+ " body = {\n",
+ " **self.body,\n",
+ " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"ProcessingInterval\": pro_interval,\n",
+ " \"ReadValueIds\": variables,\n",
+ " \"AggregateName\": agg_name\n",
+ " }\n",
+ "\n",
+ " for attempt in range(max_retries):\n",
+ " try:\n",
+ " content = await request_from_api_async(\n",
+ " client_pool,\n",
+ " method=\"POST\",\n",
+ " endpoint=f\"/values/historicalaggregated\",\n",
+ " data=json.dumps(body, default=self.json_serial),\n",
+ " extended_timeout=True\n",
+ " )\n",
+ " break\n",
+ " except (aiohttp.ClientError, ValidationError) as e:\n",
+ " if attempt < max_retries - 1:\n",
+ " wait_time = retry_delay * (2 ** attempt)\n",
+ " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
+ " await asyncio.sleep(wait_time)\n",
+ " else:\n",
+ " logger.error(f\"Max retries reached. Error: {e}\")\n",
+ " raise RuntimeError(f'Error message {e}')\n",
+ "\n",
+ " self._check_content(content)\n",
+ "\n",
+ " df_list = []\n",
+ " for item in content[\"HistoryReadResults\"]:\n",
+ " df = pd.json_normalize(item[\"DataValues\"])\n",
+ " for key, value in item[\"NodeId\"].items():\n",
+ " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n",
+ " df_list.append(df)\n",
+ " \n",
+ " if df_list:\n",
+ " df_result = pd.concat(df_list)\n",
+ " df_result.reset_index(inplace=True, drop=True)\n",
+ " return df_result\n",
+ "\n",
+ " tasks = [\n",
+ " process_batch(variables, time_batch)\n",
+ " for variables in variable_batches\n",
+ " for time_batch in range(max_time_batches)\n",
+ " ]\n",
+ "\n",
+ " try:\n",
+ " results = await asyncio.gather(*tasks)\n",
+ " all_results.extend(results)\n",
+ "\n",
+ " logger.info(\"Combining all batches...\")\n",
+ " combined_df = pd.concat(all_results, ignore_index=True)\n",
+ " columns = {\n",
+ " \"Value.Type\": \"ValueType\",\n",
+ " \"Value.Body\": \"Value\",\n",
+ " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " \"StatusCode.Code\": \"StatusCode\",\n",
+ " \"SourceTimestamp\": \"Timestamp\",\n",
+ " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " }\n",
+ " return self._process_df(combined_df, columns)\n",
+ " finally:\n",
+ " await client_pool.close_all()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1 day aggregated historical data\n",
+ "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n",
+ " opc_data,\n",
+ " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
+ " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
+ " pro_interval=60*1000,\n",
+ " agg_name=\"Average\",\n",
+ " variable_list=string_sets.variables_as_list([\"DCPower\"]),\n",
+ " max_data_points=10000,\n",
+ " max_concurrent_requests=100\n",
+ ")\n",
+ "one_day_historical_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Async with Data Handler"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import asyncio\n",
+ "import aiohttp\n",
+ "import pandas as pd\n",
+ "import sqlite3\n",
+ "import tempfile\n",
+ "import os\n",
+ "import json\n",
+ "from asyncio import Semaphore\n",
+ "from typing import List, Dict, Any\n",
+ "from datetime import datetime, timedelta\n",
+ "import logging\n",
+ "import pyarrow as pa\n",
+ "import pyarrow.parquet as pq"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class DataHandler:\n",
+ " def __init__(self, max_memory_rows=10000):\n",
+ " self.max_memory_rows = max_memory_rows\n",
+ " self.temp_dir = tempfile.mkdtemp()\n",
+ " self.db_path = os.path.join(self.temp_dir, 'temp_data.db')\n",
+ " self.conn = sqlite3.connect(self.db_path)\n",
+ " self.conn.execute('''CREATE TABLE IF NOT EXISTS temp_data\n",
+ " (id INTEGER PRIMARY KEY AUTOINCREMENT,\n",
+ " batch_id TEXT,\n",
+ " data TEXT)''')\n",
+ "\n",
+ " async def save_data(self, batch_id: str, data: pd.DataFrame):\n",
+ " if len(data) <= self.max_memory_rows:\n",
+ " # Store small datasets directly in SQLite\n",
+ " self.conn.execute(\"INSERT INTO temp_data (batch_id, data) VALUES (?, ?)\",\n",
+ " (batch_id, data.to_json()))\n",
+ " else:\n",
+ " # Stream larger datasets to Parquet file\n",
+ " file_path = os.path.join(self.temp_dir, f\"batch_{batch_id}.parquet\")\n",
+ " table = pa.Table.from_pandas(data)\n",
+ " pq.write_table(table, file_path)\n",
+ " \n",
+ " # Store file path in SQLite\n",
+ " self.conn.execute(\"INSERT INTO temp_data (batch_id, data) VALUES (?, ?)\",\n",
+ " (batch_id, file_path))\n",
+ " self.conn.commit()\n",
+ "\n",
+ " async def get_data(self, batch_id: str) -> pd.DataFrame:\n",
+ " cursor = self.conn.execute(\"SELECT data FROM temp_data WHERE batch_id = ?\", (batch_id,))\n",
+ " result = cursor.fetchone()\n",
+ " if result:\n",
+ " data = result[0]\n",
+ " if data.startswith('{'): # JSON data\n",
+ " return pd.read_json(data)\n",
+ " else: # File path\n",
+ " return pd.read_parquet(data)\n",
+ " return None\n",
+ "\n",
+ " def cleanup(self):\n",
+ " self.conn.close()\n",
+ " for file in os.listdir(self.temp_dir):\n",
+ " os.remove(os.path.join(self.temp_dir, file))\n",
+ " os.rmdir(self.temp_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def get_historical_aggregated_values_batch_time_vars_data_async(\n",
+ " self,\n",
+ " start_time: datetime,\n",
+ " end_time: datetime,\n",
+ " pro_interval: int,\n",
+ " agg_name: str,\n",
+ " variable_list: List[str],\n",
+ " max_data_points: int = 1000,\n",
+ " max_retries: int = 3,\n",
+ " retry_delay: int = 5,\n",
+ " max_concurrent_requests: int = 10\n",
+ ") -> pd.DataFrame:\n",
+ " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
+ " logger = logging.getLogger(__name__)\n",
+ "\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
+ " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
+ " estimated_intervals = total_time_range_ms / pro_interval\n",
+ " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
+ " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
+ " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
+ " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
+ "\n",
+ " all_results = []\n",
+ " semaphore = Semaphore(max_concurrent_requests)\n",
+ " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n",
+ " data_handler = DataHandler()\n",
+ "\n",
+ " async def process_batch(vid, variables, time_batch):\n",
+ " async with semaphore:\n",
+ " batch_start_ms = time_batch * time_batch_size_ms\n",
+ " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
+ " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
+ " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
+ "\n",
+ " body = {\n",
+ " **self.body,\n",
+ " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"ProcessingInterval\": pro_interval,\n",
+ " \"ReadValueIds\": variables,\n",
+ " \"AggregateName\": agg_name\n",
+ " }\n",
+ "\n",
+ " for attempt in range(max_retries):\n",
+ " try:\n",
+ " content = await request_from_api_async(\n",
+ " client_pool,\n",
+ " method=\"POST\",\n",
+ " endpoint=f\"/values/historicalaggregated\",\n",
+ " data=json.dumps(body, default=self.json_serial),\n",
+ " extended_timeout=True\n",
+ " )\n",
+ " break\n",
+ " except (aiohttp.ClientError, ValidationError) as e:\n",
+ " if attempt < max_retries - 1:\n",
+ " wait_time = retry_delay * (2 ** attempt)\n",
+ " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
+ " await asyncio.sleep(wait_time)\n",
+ " else:\n",
+ " logger.error(f\"Max retries reached. Error: {e}\")\n",
+ " raise RuntimeError(f'Error message {e}')\n",
+ "\n",
+ " self._check_content(content)\n",
+ "\n",
+ " df_result = pd.json_normalize(\n",
+ " content, \n",
+ " record_path=['HistoryReadResults', 'DataValues'], \n",
+ " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n",
+ " ['HistoryReadResults', 'NodeId','Id'],\n",
+ " ['HistoryReadResults', 'NodeId','Namespace']]\n",
+ " )\n",
+ " batch_id = f\"{time_batch}_{vid}\"\n",
+ " await data_handler.save_data(batch_id, df_result)\n",
+ " return batch_id\n",
+ "\n",
+ " tasks = [\n",
+ " process_batch(vid,variables, time_batch)\n",
+ " for vid,variables in enumerate(variable_batches)\n",
+ " for time_batch in range(max_time_batches)\n",
+ " ]\n",
+ "\n",
+ " try:\n",
+ " batch_ids = await asyncio.gather(*tasks)\n",
+ " # for batch_id in batch_ids:\n",
+ " # df = await data_handler.get_data(batch_id)\n",
+ " # all_results.append(df)\n",
+ "\n",
+ " # logger.info(\"Combining all batches...\")\n",
+ " # combined_df = pd.concat(all_results, ignore_index=True)\n",
+ " # columns = {\n",
+ " # \"Value.Type\": \"ValueType\",\n",
+ " # \"Value.Body\": \"Value\",\n",
+ " # \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " # \"StatusCode.Code\": \"StatusCode\",\n",
+ " # \"SourceTimestamp\": \"Timestamp\",\n",
+ " # \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " # \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " # \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " # }\n",
+ " # return self._process_df(combined_df, columns)\n",
+ " finally:\n",
+ " await client_pool.close_all()\n",
+ " data_handler.cleanup()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1 day aggregated historical data\n",
+ "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_data_async(\n",
+ " opc_data,\n",
+ " start_time=start_time,\n",
+ " end_time=end_time,\n",
+ " pro_interval=pro_interval,\n",
+ " agg_name=agg_name,\n",
+ " variable_list=variable_list,\n",
+ " max_data_points=20000,\n",
+ " max_concurrent_requests=50\n",
+ ")\n",
+ "one_day_historical_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Async with parquet data handler for large data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import asyncio\n",
+ "import aiohttp\n",
+ "import pandas as pd\n",
+ "import pyarrow as pa\n",
+ "import pyarrow.parquet as pq\n",
+ "from datetime import datetime, timedelta\n",
+ "import json\n",
+ "from typing import List, Dict, Any\n",
+ "import logging\n",
+ "from asyncio import Semaphore\n",
+ "from aiohttp import TCPConnector\n",
+ "from tenacity import retry, stop_after_attempt, wait_exponential\n",
+ "from concurrent.futures import ThreadPoolExecutor\n",
+ "\n",
+ "import tracemalloc\n",
+ "tracemalloc.start()\n",
+ "\n",
+ "logger = logging.getLogger(__name__)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class AsyncParquetWriter:\n",
+ " def __init__(self, filename):\n",
+ " self.filename = filename\n",
+ " self.writer = None\n",
+ " self.executor = ThreadPoolExecutor(max_workers=10)\n",
+ "\n",
+ " async def write(self, df):\n",
+ " loop = asyncio.get_running_loop()\n",
+ " table = pa.Table.from_pandas(df)\n",
+ " if self.writer is None:\n",
+ " self.writer = pq.ParquetWriter(self.filename, table.schema)\n",
+ " await loop.run_in_executor(self.executor, self.writer.write_table, table)\n",
+ "\n",
+ " async def close(self):\n",
+ " if self.writer:\n",
+ " loop = asyncio.get_running_loop()\n",
+ " await loop.run_in_executor(self.executor, self.writer.close)\n",
+ " self.writer = None\n",
+ "\n",
+ "class DataHandler:\n",
+ " def __init__(self, base_path):\n",
+ " self.base_path = base_path\n",
+ " self.writers = {}\n",
+ "\n",
+ " async def save_data(self, batch_id: str, data: pd.DataFrame):\n",
+ " if batch_id not in self.writers:\n",
+ " self.writers[batch_id] = AsyncParquetWriter(f\"{self.base_path}/batch_{batch_id}.parquet\")\n",
+ " await self.writers[batch_id].write(data)\n",
+ "\n",
+ " async def close_all(self):\n",
+ " for writer in self.writers.values():\n",
+ " await writer.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n",
+ " self,\n",
+ " start_time: datetime,\n",
+ " end_time: datetime,\n",
+ " pro_interval: int,\n",
+ " agg_name: str,\n",
+ " variable_list: List[str],\n",
+ " max_data_points: int = 100000,\n",
+ " max_retries: int = 3,\n",
+ " retry_delay: int = 5,\n",
+ " max_concurrent_requests: int = 50\n",
+ ") -> pd.DataFrame:\n",
+ " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
+ " logger = logging.getLogger(__name__)\n",
+ "\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
+ " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
+ " estimated_intervals = total_time_range_ms / pro_interval\n",
+ " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
+ " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
+ " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
+ " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
+ "\n",
+ " all_results = []\n",
+ " semaphore = Semaphore(max_concurrent_requests)\n",
+ " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n",
+ " data_handler = DataHandler(base_path=\"pqfiles\")\n",
+ "\n",
+ " async def process_batch(vid, variables, time_batch):\n",
+ " async with semaphore:\n",
+ " batch_start_ms = time_batch * time_batch_size_ms\n",
+ " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
+ " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
+ " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
+ "\n",
+ " body = {\n",
+ " **self.body,\n",
+ " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
+ " \"ProcessingInterval\": pro_interval,\n",
+ " \"ReadValueIds\": variables,\n",
+ " \"AggregateName\": agg_name\n",
+ " }\n",
+ "\n",
+ " for attempt in range(max_retries):\n",
+ " try:\n",
+ " content = await request_from_api_async(\n",
+ " client_pool,\n",
+ " method=\"POST\",\n",
+ " endpoint=f\"/values/historicalaggregated\",\n",
+ " data=json.dumps(body, default=self.json_serial),\n",
+ " extended_timeout=True\n",
+ " )\n",
+ " break\n",
+ " except (aiohttp.ClientError, ValidationError) as e:\n",
+ " if attempt < max_retries - 1:\n",
+ " wait_time = retry_delay * (2 ** attempt)\n",
+ " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
+ " await asyncio.sleep(wait_time)\n",
+ " else:\n",
+ " logger.error(f\"Max retries reached. Error: {e}\")\n",
+ " raise RuntimeError(f'Error message {e}')\n",
+ "\n",
+ " self._check_content(content)\n",
+ "\n",
+ " df_result = pd.json_normalize(\n",
+ " content, \n",
+ " record_path=['HistoryReadResults', 'DataValues'], \n",
+ " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n",
+ " ['HistoryReadResults', 'NodeId','Id'],\n",
+ " ['HistoryReadResults', 'NodeId','Namespace']]\n",
+ " )\n",
+ " batch_id = f\"{time_batch}_{vid}\"\n",
+ " await data_handler.save_data(batch_id, df_result)\n",
+ " return batch_id\n",
+ "\n",
+ " tasks = [\n",
+ " process_batch(vid,variables, time_batch)\n",
+ " for vid,variables in enumerate(variable_batches)\n",
+ " for time_batch in range(max_time_batches)\n",
+ " ]\n",
+ "\n",
+ " try:\n",
+ " batch_ids = await asyncio.gather(*tasks)\n",
+ " # for batch_id in batch_ids:\n",
+ " # df = await data_handler.get_data(batch_id)\n",
+ " # all_results.append(df)\n",
+ "\n",
+ " # logger.info(\"Combining all batches...\")\n",
+ " # combined_df = pd.concat(all_results, ignore_index=True)\n",
+ " # columns = {\n",
+ " # \"Value.Type\": \"ValueType\",\n",
+ " # \"Value.Body\": \"Value\",\n",
+ " # \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " # \"StatusCode.Code\": \"StatusCode\",\n",
+ " # \"SourceTimestamp\": \"Timestamp\",\n",
+ " # \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " # \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " # \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " # }\n",
+ " # return self._process_df(combined_df, columns)\n",
+ " finally:\n",
+ " await client_pool.close_all()\n",
+ " await data_handler.close_all()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1 day aggregated historical data\n",
+ "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n",
+ " opc_data,\n",
+ " start_time=datetime(2024,6,1,00,00),\n",
+ " end_time=datetime(2024,6,2,00,00),\n",
+ " pro_interval=pro_interval,\n",
+ " agg_name=agg_name,\n",
+ " variable_list=variable_list,\n",
+ " max_data_points=50000,\n",
+ " max_concurrent_requests=50\n",
+ ")\n",
+ "one_day_historical_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Stringset data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_historical_aggregated_values(opc_data,\n",
+ " start_time, \n",
+ " end_time, \n",
+ " pro_interval, \n",
+ " agg_name, \n",
+ " variable_list\n",
+ ") -> pd.DataFrame:\n",
+ " \n",
+ " vars = opc_data._get_variable_list_as_list(variable_list)\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars]\n",
+ "\n",
+ " body = {\n",
+ " **opc_data.body, \n",
+ " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n",
+ " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n",
+ " \"ProcessingInterval\": pro_interval, \n",
+ " \"AggregateName\": agg_name,\n",
+ " \"ReadValueIds\": extended_variables\n",
+ " }\n",
+ " print(body)\n",
+ "\n",
+ " content = request_from_api(\n",
+ " rest_url=opcua_rest_url, \n",
+ " method=\"POST\", \n",
+ " endpoint=\"values/historicalaggregated\", \n",
+ " data=json.dumps(body, default=opc_data.json_serial), \n",
+ " headers=opc_data.headers, \n",
+ " extended_timeout=True\n",
+ " )\n",
+ " print(content)\n",
+ " df_result = pd.json_normalize(\n",
+ " content, \n",
+ " record_path=['HistoryReadResults', 'DataValues'], \n",
+ " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],['HistoryReadResults', 'NodeId','Namespace']\n",
+ " ]\n",
+ " )\n",
+ " columns = {\n",
+ " \"Value.Type\": \"ValueType\",\n",
+ " \"Value.Body\": \"Value\",\n",
+ " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " \"StatusCode.Code\": \"StatusCode\",\n",
+ " \"SourceTimestamp\": \"Timestamp\",\n",
+ " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " }\n",
+ " return opc_data._process_df(df_result, columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "start_time=(datetime.datetime.now() - datetime.timedelta(30))\n",
+ "end_time=(datetime.datetime.now() - datetime.timedelta(29))\n",
+ "pro_interval=600000\n",
+ "agg_name=\"Average\"\n",
+ "variable_list=string_sets.variables_as_list([\"DCPower\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_historical_aggregated_values(opc_data,\n",
+ " start_time, \n",
+ " end_time, \n",
+ " pro_interval, \n",
+ " agg_name, \n",
+ " variable_list) -> pd.DataFrame:\n",
+ " vars = opc_data._get_variable_list_as_list(variable_list)\n",
+ " batch_size = 100\n",
+ " batches = [vars[i:i + batch_size] for i in range(0, len(vars), batch_size)]\n",
+ " \n",
+ " combined_df = pd.DataFrame() \n",
+ " \n",
+ " for batch in batches:\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in batch]\n",
+ " \n",
+ " body = {\n",
+ " **opc_data.body, \n",
+ " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n",
+ " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n",
+ " \"ProcessingInterval\": pro_interval, \n",
+ " \"AggregateName\": agg_name,\n",
+ " \"ReadValueIds\": extended_variables\n",
+ " }\n",
+ " \n",
+ " content = request_from_api(\n",
+ " rest_url=opcua_rest_url, \n",
+ " method=\"POST\", \n",
+ " endpoint=\"values/historicalaggregated\", \n",
+ " data=json.dumps(body, default=opc_data.json_serial), \n",
+ " headers=opc_data.headers, \n",
+ " extended_timeout=True\n",
+ " )\n",
+ " \n",
+ " df_result = pd.json_normalize(\n",
+ " content, \n",
+ " record_path=['HistoryReadResults', 'DataValues'], \n",
+ " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],['HistoryReadResults', 'NodeId','Namespace']]\n",
+ " )\n",
+ " \n",
+ " if combined_df.empty:\n",
+ " combined_df = df_result\n",
+ " else:\n",
+ " combined_df = pd.concat([combined_df, df_result], ignore_index=True)\n",
+ " \n",
+ " columns = {\n",
+ " \"Value.Type\": \"ValueType\",\n",
+ " \"Value.Body\": \"Value\",\n",
+ " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " \"StatusCode.Code\": \"StatusCode\",\n",
+ " \"SourceTimestamp\": \"Timestamp\",\n",
+ " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " }\n",
+ " \n",
+ " return opc_data._process_df(combined_df, columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_historical_aggregated_values(opc_data,\n",
+ " start_time, \n",
+ " end_time, \n",
+ " pro_interval, \n",
+ " agg_name, \n",
+ " variable_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import hashlib\n",
+ "import concurrent.futures"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_historical_aggregated_values(opc_data, start_time, end_time, pro_interval, agg_name, variable_list) -> pd.DataFrame:\n",
+ " vars = opc_data._get_variable_list_as_list(variable_list)\n",
+ " batch_size = 150\n",
+ " batches = [vars[i:i + batch_size] for i in range(0, len(vars), batch_size)]\n",
+ "\n",
+ " def process_batch(batch):\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in batch]\n",
+ " body = {\n",
+ " **opc_data.body,\n",
+ " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n",
+ " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n",
+ " \"ProcessingInterval\": pro_interval,\n",
+ " \"AggregateName\": agg_name,\n",
+ " \"ReadValueIds\": extended_variables\n",
+ " }\n",
+ " content = request_from_api(\n",
+ " rest_url=opcua_rest_url,\n",
+ " method=\"POST\",\n",
+ " endpoint=\"values/historicalaggregated\",\n",
+ " data=json.dumps(body, default=opc_data.json_serial),\n",
+ " headers=opc_data.headers,\n",
+ " extended_timeout=True\n",
+ " )\n",
+ " return pd.json_normalize(\n",
+ " content,\n",
+ " record_path=['HistoryReadResults', 'DataValues'],\n",
+ " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId', 'Id'], ['HistoryReadResults', 'NodeId', 'Namespace']]\n",
+ " )\n",
+ "\n",
+ " dataframes = []\n",
+ " with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+ " future_to_batch = {executor.submit(process_batch, batch): batch for batch in batches}\n",
+ " for future in concurrent.futures.as_completed(future_to_batch):\n",
+ " dataframes.append(future.result())\n",
+ "\n",
+ " combined_df = pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()\n",
+ "\n",
+ " columns = {\n",
+ " \"Value.Type\": \"ValueType\",\n",
+ " \"Value.Body\": \"Value\",\n",
+ " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " \"StatusCode.Code\": \"StatusCode\",\n",
+ " \"SourceTimestamp\": \"Timestamp\",\n",
+ " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
+ " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
+ " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " }\n",
+ "\n",
+ " return opc_data._process_df(combined_df, columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vars = opc_data._get_variable_list_as_list(variable_list)\n",
+ "extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "body = {\n",
+ " **opc_data.body,\n",
+ " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n",
+ " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n",
+ " \"ProcessingInterval\": pro_interval,\n",
+ " \"AggregateName\": agg_name,\n",
+ " \"ReadValueIds\": extended_variables\n",
+ "}\n",
+ "body"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_historical_aggregated_values(opc_data,\n",
+ " start_time, \n",
+ " end_time, \n",
+ " pro_interval, \n",
+ " agg_name, \n",
+ " variable_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "start_time = datetime.now() - relativedelta(months=1)\n",
+ "end_time = datetime.now()\n",
+ "get_historical_aggregated_values(opc_data,\n",
+ " start_time, \n",
+ " end_time, \n",
+ " pro_interval, \n",
+ " agg_name, \n",
+ " variable_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# History data for 1 day, 10 min aggregate - stringsets\n",
+ "history_agg = opc_data.get_historical_aggregated_values(\n",
+ " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
+ " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
+ " pro_interval=600000,\n",
+ " agg_name=\"Average\",\n",
+ " variable_list=inverters.variables_as_list([\"DCPower\"]),\n",
+ ")\n",
+ "history_agg"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import copy\n",
+ "import math\n",
+ "from pydantic import BaseModel, AnyUrl\n",
+ "from datetime import timedelta\n",
+ "import asyncio\n",
+ "import aiohttp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class Variables(BaseModel):\n",
+ " \"\"\"Helper class to parse all values api's.\n",
+ " Variables are described in https://reference.opcfoundation.org/v104/Core/docs/Part3/8.2.1/\n",
+ "\n",
+ " Variables:\n",
+ " Id: str - Id of the signal, e.g. SSO.EG-AS.WeatherSymbol\n",
+ " Namespace: int - Namespace on the signal, e.g. 2.\n",
+ " IdType: int - IdTypes described in https://reference.opcfoundation.org/v104/Core/docs/Part3/8.2.3/.\n",
+ " \"\"\"\n",
+ " Id: str\n",
+ " Namespace: int\n",
+ " IdType: int"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables]) -> dict:\n",
+ " \"\"\"Make API request for the given time range and variable list\"\"\"\n",
+ "\n",
+ " # Creating a new variable list to remove pydantic models\n",
+ " vars = opc_data._get_variable_list_as_list(variable_list)\n",
+ "\n",
+ " extended_variables = [\n",
+ " {\n",
+ " \"NodeId\": var,\n",
+ " \"AggregateName\": agg_name,\n",
+ " }\n",
+ " for var in vars\n",
+ " ]\n",
+ " body = copy.deepcopy(opc_data.body)\n",
+ " body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+ " body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+ " body[\"ProcessingInterval\"] = pro_interval\n",
+ " body[\"ReadValueIds\"] = extended_variables\n",
+ " body[\"AggregateName\"] = agg_name\n",
+ "\n",
+ " # Make API request using aiohttp session\n",
+ " async with aiohttp.ClientSession() as session:\n",
+ " async with session.post(\n",
+ " f\"{opcua_rest_url}values/historicalaggregated\",\n",
+ " data=json.dumps(body, default=opc_data.json_serial),\n",
+ " headers=opc_data.headers,\n",
+ " timeout=aiohttp.ClientTimeout(total=None) \n",
+ " ) as response:\n",
+ " response.raise_for_status()\n",
+ " content = await response.json()\n",
+ "\n",
+ " return content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vars = opc_data._get_variable_list_as_list(variable_list)\n",
+ "vars1 = vars[0:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "extended_variables = [\n",
+ " {\n",
+ " \"NodeId\": var,\n",
+ " \"AggregateName\": agg_name,\n",
+ " }\n",
+ " for var in vars1\n",
+ "]\n",
+ "len(extended_variables)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "body = copy.deepcopy(opc_data.body)\n",
+ "body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+ "body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+ "body[\"ProcessingInterval\"] = pro_interval\n",
+ "body[\"ReadValueIds\"] = extended_variables\n",
+ "body[\"AggregateName\"] = agg_name\n",
+ "body"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "f\"{opcua_rest_url}values/historicalaggregated\","
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data=json.dumps(body, default=opc_data.json_serial)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_dict = json.loads(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "read_value_ids = data_dict['ReadValueIds']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "len(read_value_ids)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "headers=opc_data.headers\n",
+ "headers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "timeout=aiohttp.ClientTimeout(total=None) \n",
+ "timeout"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async with aiohttp.ClientSession() as session:\n",
+ " async with session.post(\n",
+ " f\"{opcua_rest_url}values/historicalaggregated\",\n",
+ " data=json.dumps(body, default=opc_data.json_serial),\n",
+ " headers=opc_data.headers,\n",
+ " timeout=aiohttp.ClientTimeout(total=None) \n",
+ " ) as response:\n",
+ " response.raise_for_status()\n",
+ " content = await response.json()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def generate_time_batches(start_time: datetime, end_time: datetime, pro_interval: int, batch_size: int) -> list[tuple]:\n",
+ " \"\"\"Generate time batches based on start time, end time, processing interval, and batch size\"\"\"\n",
+ "\n",
+ " total_time_range = end_time - start_time\n",
+ " pro_interval_seconds = (pro_interval / 1000)\n",
+ " total_data_points = (total_time_range.total_seconds() // pro_interval_seconds) + 1\n",
+ "\n",
+ " total_batches = math.ceil(total_data_points / batch_size)\n",
+ " actual_batch_size = math.ceil(total_data_points / total_batches)\n",
+ "\n",
+ " time_batches = [\n",
+ " (start_time + timedelta(seconds=(i * actual_batch_size * pro_interval_seconds)),\n",
+ " start_time + timedelta(seconds=((i + 1) * actual_batch_size * pro_interval_seconds)) - timedelta(seconds=pro_interval_seconds))\n",
+ " for i in range(total_batches)\n",
+ " ]\n",
+ "\n",
+ " return time_batches"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def generate_variable_batches(variable_list: list[Variables], batch_size: int) -> list[list[Variables]]:\n",
+ " \"\"\"Generate variable batches based on the variable list and batch size\"\"\"\n",
+ "\n",
+ " variable_batches = [\n",
+ " variable_list[i:i + batch_size] for i in range(0, len(variable_list), batch_size)\n",
+ " ]\n",
+ "\n",
+ " return variable_batches"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def process_api_response(opc_data, response: dict) -> pd.DataFrame:\n",
+ " \"\"\"Process the API response and return the result dataframe\"\"\"\n",
+ " \n",
+ " df_result = pd.json_normalize(response, record_path=['HistoryReadResults', 'DataValues'], \n",
+ " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],\n",
+ " ['HistoryReadResults', 'NodeId','Namespace']] )\n",
+ "\n",
+ " for i, row in df_result.iterrows():\n",
+ " if not math.isnan(row[\"Value.Type\"]):\n",
+ " value_type = opc_data._get_value_type(int(row[\"Value.Type\"])).get(\"type\")\n",
+ " df_result.at[i, \"Value.Type\"] = str(value_type)\n",
+ "\n",
+ " df_result.rename(\n",
+ " columns={\n",
+ " \"Value.Type\": \"ValueType\",\n",
+ " \"Value.Body\": \"Value\",\n",
+ " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
+ " \"StatusCode.Code\": \"StatusCode\",\n",
+ " \"SourceTimestamp\": \"Timestamp\",\n",
+ " \"HistoryReadResults.NodeId.IdType\": \"Id\",\n",
+ " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
+ " },\n",
+ " errors=\"raise\",\n",
+ " inplace=True,\n",
+ " )\n",
+ "\n",
+ " return df_result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def get_historical_aggregated_values_async(\n",
+ " opc_data,\n",
+ " start_time: datetime,\n",
+ " end_time: datetime,\n",
+ " pro_interval: int,\n",
+ " agg_name: str,\n",
+ " variable_list: list[Variables],\n",
+ " batch_size: int = 1000\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n",
+ "\n",
+ " \n",
+ " time_batches = generate_time_batches(start_time, end_time, pro_interval, batch_size)\n",
+ " variable_batches = generate_variable_batches(variable_list, batch_size)\n",
+ "\n",
+ " # Creating tasks for each API request and gathering the results\n",
+ " tasks = []\n",
+ "\n",
+ " for time_batch_start, time_batch_end in time_batches:\n",
+ " for variable_sublist in variable_batches:\n",
+ " task = asyncio.create_task(\n",
+ " make_async_api_request(opc_data, time_batch_start, time_batch_end, pro_interval, agg_name, variable_sublist)\n",
+ " ) \n",
+ " tasks.append(task)\n",
+ " \n",
+ " # Execute all tasks concurrently and gather their results\n",
+ " responses = await asyncio.gather(*tasks)\n",
+ " \n",
+ " # Processing the API responses\n",
+ " result_list = []\n",
+ " for idx, batch_response in enumerate(responses):\n",
+ " \n",
+ " batch_result = process_api_response(opc_data, batch_response)\n",
+ " result_list.append(batch_result)\n",
+ " \n",
+ " result_df = pd.concat(result_list, ignore_index=True)\n",
+ "\n",
+ " return result_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1 day aggregated historical inverter data in asyncio process\n",
+ "one_days_historic_inverter_data2 = await get_historical_aggregated_values_async(\n",
+ " opc_data,\n",
+ " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
+ " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
+ " pro_interval=60*1000,\n",
+ " agg_name=\"Average\",\n",
+ " variable_list=string_sets.variables_as_list([\"DCPower\"]),\n",
+ " batch_size=100\n",
+ ")\n",
+ "one_days_historic_inverter_data2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def generate_time_chunks(start_time: datetime, end_time: datetime):\n",
+ " \"\"\"Generate time chunks between start_time and end_time, each chunk_duration_minutes long.\"\"\"\n",
+ " delta = timedelta(minutes=60)\n",
+ " current_time = start_time\n",
+ " while current_time < end_time:\n",
+ " chunk_end_time = min(current_time + delta, end_time)\n",
+ " yield (current_time, chunk_end_time)\n",
+ " current_time = chunk_end_time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables], max_data_points=500) -> dict:\n",
+ " \"\"\"Make API request for the given time range and variable list, with additional chunking based on data points.\"\"\"\n",
+ "\n",
+ " def chunk_list(lst, n):\n",
+ " \"\"\"Yield successive n-sized chunks from lst.\"\"\"\n",
+ " for i in range(0, len(lst), n):\n",
+ " yield lst[i:i + n]\n",
+ "\n",
+ " async def fetch_data_for_time_period(session, vars_chunk, start, end):\n",
+ " \"\"\"Fetch data for a given time period and chunk of variables.\"\"\"\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars_chunk]\n",
+ " body = copy.deepcopy(opc_data.body)\n",
+ " body[\"StartTime\"] = start.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+ " body[\"EndTime\"] = end.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+ " body[\"ProcessingInterval\"] = pro_interval\n",
+ " body[\"ReadValueIds\"] = extended_variables\n",
+ " body[\"AggregateName\"] = agg_name\n",
+ "\n",
+ " async with session.post(\n",
+ " f\"{opcua_rest_url}values/historicalaggregated\",\n",
+ " data=json.dumps(body, default=str),\n",
+ " headers=opc_data.headers,\n",
+ " timeout=aiohttp.ClientTimeout(total=None)\n",
+ " ) as response:\n",
+ " response.raise_for_status()\n",
+ " return await response.json()\n",
+ "\n",
+ " # Creating a new variable list to remove pydantic models\n",
+ " vars = opc_data._get_variable_list_as_list(variable_list)\n",
+ " chunk_size = 5 # Chunk size for node IDs\n",
+ " vars_chunks = list(chunk_list(vars, chunk_size))\n",
+ "\n",
+ " all_responses = []\n",
+ " async with aiohttp.ClientSession() as session:\n",
+ " for vars_chunk in vars_chunks:\n",
+ " # Generate time chunks for the given time period\n",
+ " async for start, end in generate_time_chunks(start_time, end_time):\n",
+ " content = await fetch_data_for_time_period(session, vars_chunk, start, end)\n",
+ " all_responses.append(content)\n",
+ " return all_responses"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables]) -> dict:\n",
+ " \"\"\"Make API request for the given time range and variable list\"\"\"\n",
+ "\n",
+ " def chunk_list(lst, n):\n",
+ " for i in range(0, len(lst), n):\n",
+ " yield lst[i:i + n]\n",
+ "\n",
+ " # Creating a new variable list to remove pydantic models\n",
+ " vars = opc_data._get_variable_list_as_list(variable_list)\n",
+ "\n",
+ " chunk_size = 150 \n",
+ " vars_chunks = list(chunk_list(vars, chunk_size))\n",
+ "\n",
+ " all_responses = []\n",
+ " async with aiohttp.ClientSession() as session:\n",
+ " for vars_chunk in vars_chunks:\n",
+ " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars_chunk]\n",
+ " body = copy.deepcopy(opc_data.body)\n",
+ " body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+ " body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
+ " body[\"ProcessingInterval\"] = pro_interval\n",
+ " body[\"ReadValueIds\"] = extended_variables\n",
+ " body[\"AggregateName\"] = agg_name\n",
+ "\n",
+ " async with session.post(\n",
+ " f\"{opcua_rest_url}values/historicalaggregated\",\n",
+ " data=json.dumps(body, default=str),\n",
+ " headers=opc_data.headers,\n",
+ " timeout=aiohttp.ClientTimeout(total=None)\n",
+ " ) as response:\n",
+ " response.raise_for_status()\n",
+ " content = await response.json()\n",
+ " all_responses.append(content) \n",
+ "\n",
+ " return all_responses"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from datetime import datetime, timedelta\n",
+ "from typing import List, Tuple"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def generate_time_chunks(start_time: datetime, end_time: datetime, interval_hours: int) -> List[Tuple[datetime, datetime]]:\n",
+ " \"\"\"Generate time chunks within the given start and end time with specified interval in hours.\"\"\"\n",
+ " delta = timedelta(hours=interval_hours)\n",
+ " current_time = start_time\n",
+ " chunks = []\n",
+ "\n",
+ " while current_time < end_time:\n",
+ " chunk_end_time = min(current_time + delta, end_time) \n",
+ " chunks.append((current_time, chunk_end_time))\n",
+ " current_time += delta\n",
+ "\n",
+ " return chunks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1 day aggregated historical inverter data in asyncio process\n",
+ "one_days_historic_inverter_data2 = await make_async_api_request(\n",
+ " opc_data,\n",
+ " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
+ " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
+ " pro_interval=60*1000,\n",
+ " agg_name=\"Average\",\n",
+ " variable_list=string_sets.variables_as_list([\"DCPower\"])\n",
+ ")\n",
+ "one_days_historic_inverter_data2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.12.1 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "6b866f0bc560289bf4bb2415ae9074243764eb008c10d00a1da29433677418de"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/api_performance_testingapi_performance_testing.ipynb b/notebooks/api_performance_testingapi_performance_testing.ipynb
deleted file mode 100644
index a2b9fa8..0000000
--- a/notebooks/api_performance_testingapi_performance_testing.ipynb
+++ /dev/null
@@ -1,3829 +0,0 @@
-{
- "cells": [
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This notebook explores both model index and opc ua scripts and contain examples of all the functions to make request to model index api and opc ua api servers. "
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Import Libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Import the required packeages\n",
- "import pandas as pd\n",
- "import os\n",
- "import json\n",
- "import datetime\n",
- "import concurrent.futures\n",
- "from dotenv import load_dotenv\n",
- "from pathlib import Path\n",
- "from dateutil.relativedelta import relativedelta"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Import Scripts"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Import model index functions\n",
- "from pyprediktormapclient.model_index import ModelIndex\n",
- "\n",
- "# Import OPC UA functions\n",
- "from pyprediktormapclient.opc_ua import OPC_UA\n",
- "\n",
- "# Import Analytics Helper\n",
- "from pyprediktormapclient.analytics_helper import AnalyticsHelper\n",
- "\n",
- "# Import \"Dataframer\" Tools\n",
- "from pyprediktormapclient.shared import *\n",
- "\n",
- "# import AUTH_CLIENT\n",
- "from pyprediktormapclient.auth_client import AUTH_CLIENT"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Consider obtaining the envrionment variables from .env file if you are running this locally from source.\n",
- "dotenv_path = Path(\".env\")\n",
- "load_dotenv(dotenv_path=dotenv_path)\n",
- "\n",
- "username = os.environ[\"USERNAME\"]\n",
- "password = os.environ[\"PASSWORD\"]\n",
- "opcua_rest_url = os.environ[\"OPC_UA_REST_URL\"]\n",
- "opcua_server_url = os.environ[\"OPC_UA_SERVER_URL\"]\n",
- "model_index_url = os.environ[\"MODEL_INDEX_URL\"]\n",
- "ory_url = os.environ[\"ORY_URL\"]\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Getting ory bearer token\n",
- "auth_client = AUTH_CLIENT(rest_url=ory_url, username=username, password=password)\n",
- "auth_client.request_new_ory_token()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Connecting to ModelIndex APIs \n",
- "model = ModelIndex(url=model_index_url, auth_client=auth_client, session=auth_client.session)\n",
- "\n",
- "# Listed sites on the model index api server\n",
- "namespaces = model.get_namespace_array()\n",
- "# Types of Objects\n",
- "object_types_json = model.get_object_types()\n",
- "object_types = AnalyticsHelper(object_types_json)\n",
- "namespace_list = object_types.namespaces_as_list(namespaces)\n",
- "\n",
- "# Initate the OPC UA API with a fixed namespace list\n",
- "opc_data = OPC_UA(rest_url=opcua_rest_url, opcua_url=opcua_server_url, namespaces=namespace_list, auth_client=auth_client)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Download data from modelindex api"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Id | \n",
- " Name | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 5:0:1061 | \n",
- " EquipmentEventType | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 5:0:1128 | \n",
- " EnergyAndPowerMeterEventType | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 5:0:1263 | \n",
- " EnergyAndPowerMeterCommLossEventType | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 5:0:1266 | \n",
- " EnergyAndPowerMeterErrorEventType | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5:0:1269 | \n",
- " EnergyAndPowerMeterWarningEventType | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 106 | \n",
- " 5:0:1013 | \n",
- " GridType | \n",
- "
\n",
- " \n",
- " 107 | \n",
- " 5:0:1011 | \n",
- " SectionType | \n",
- "
\n",
- " \n",
- " 108 | \n",
- " 5:0:1009 | \n",
- " SiteType | \n",
- "
\n",
- " \n",
- " 109 | \n",
- " 5:0:1010 | \n",
- " SubSiteType | \n",
- "
\n",
- " \n",
- " 110 | \n",
- " 5:0:1012 | \n",
- " SubstationType | \n",
- "
\n",
- " \n",
- "
\n",
- "
111 rows × 2 columns
\n",
- "
"
- ],
- "text/plain": [
- " Id Name\n",
- "0 5:0:1061 EquipmentEventType\n",
- "1 5:0:1128 EnergyAndPowerMeterEventType\n",
- "2 5:0:1263 EnergyAndPowerMeterCommLossEventType\n",
- "3 5:0:1266 EnergyAndPowerMeterErrorEventType\n",
- "4 5:0:1269 EnergyAndPowerMeterWarningEventType\n",
- ".. ... ...\n",
- "106 5:0:1013 GridType\n",
- "107 5:0:1011 SectionType\n",
- "108 5:0:1009 SiteType\n",
- "109 5:0:1010 SubSiteType\n",
- "110 5:0:1012 SubstationType\n",
- "\n",
- "[111 rows x 2 columns]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Unique types of Objects\n",
- "object_types_unique = object_types.dataframe[[\"Id\", \"Name\"]].drop_duplicates()\n",
- "object_types_unique"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['EG-AS']"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# To get the objects of a type\n",
- "sites_json = model.get_objects_of_type(\"SiteType\")\n",
- "\n",
- "# Send the returned JSON into a normalizer to get Id, Type, Name, Props and Vars as columns\n",
- "sites = AnalyticsHelper(sites_json)\n",
- "sites.list_of_names()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Id | \n",
- " Type | \n",
- " Name | \n",
- " VariableId | \n",
- " VariableName | \n",
- " VariableIdSplit | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Alarms.CommLossPlantDevice | \n",
- " CommLossPlantDevice | \n",
- " {'Id': 'SSO.EG-AS.Alarms.CommLossPlantDevice',... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Signals.PPC.IsCurtailment | \n",
- " PPC.IsCurtailment | \n",
- " {'Id': 'SSO.EG-AS.Signals.PPC.IsCurtailment', ... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Signals.State.IsDay | \n",
- " State.IsDay | \n",
- " {'Id': 'SSO.EG-AS.Signals.State.IsDay', 'Names... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Parameters.ContractDuration | \n",
- " ContractDuration | \n",
- " {'Id': 'SSO.EG-AS.Parameters.ContractDuration'... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Parameters.RegionKey | \n",
- " RegionKey | \n",
- " {'Id': 'SSO.EG-AS.Parameters.RegionKey', 'Name... | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Signals.PPC.SetpointActivePower | \n",
- " PPC.SetpointActivePower | \n",
- " {'Id': 'SSO.EG-AS.Signals.PPC.SetpointActivePo... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Signals.Weather.IrradiationDiffu... | \n",
- " Weather.IrradiationDiffuseHorizontal | \n",
- " {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationD... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Signals.Weather.IrradiationHoriz... | \n",
- " Weather.IrradiationHorizontal | \n",
- " {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationH... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Signals.Weather.IrradiationInCline | \n",
- " Weather.IrradiationInCline | \n",
- " {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationI... | \n",
- "
\n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS | \n",
- " 5:0:1009 | \n",
- " EG-AS | \n",
- " 3:1:SSO.EG-AS.Signals.Status | \n",
- " Status | \n",
- " {'Id': 'SSO.EG-AS.Signals.Status', 'Namespace'... | \n",
- "
\n",
- " \n",
- "
\n",
- "
118 rows × 6 columns
\n",
- "
"
- ],
- "text/plain": [
- " Id Type Name \\\n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- ".. ... ... ... \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "0 3:1:SSO.EG-AS 5:0:1009 EG-AS \n",
- "\n",
- " VariableId \\\n",
- "0 3:1:SSO.EG-AS.Alarms.CommLossPlantDevice \n",
- "0 3:1:SSO.EG-AS.Signals.PPC.IsCurtailment \n",
- "0 3:1:SSO.EG-AS.Signals.State.IsDay \n",
- "0 3:1:SSO.EG-AS.Parameters.ContractDuration \n",
- "0 3:1:SSO.EG-AS.Parameters.RegionKey \n",
- ".. ... \n",
- "0 3:1:SSO.EG-AS.Signals.PPC.SetpointActivePower \n",
- "0 3:1:SSO.EG-AS.Signals.Weather.IrradiationDiffu... \n",
- "0 3:1:SSO.EG-AS.Signals.Weather.IrradiationHoriz... \n",
- "0 3:1:SSO.EG-AS.Signals.Weather.IrradiationInCline \n",
- "0 3:1:SSO.EG-AS.Signals.Status \n",
- "\n",
- " VariableName \\\n",
- "0 CommLossPlantDevice \n",
- "0 PPC.IsCurtailment \n",
- "0 State.IsDay \n",
- "0 ContractDuration \n",
- "0 RegionKey \n",
- ".. ... \n",
- "0 PPC.SetpointActivePower \n",
- "0 Weather.IrradiationDiffuseHorizontal \n",
- "0 Weather.IrradiationHorizontal \n",
- "0 Weather.IrradiationInCline \n",
- "0 Status \n",
- "\n",
- " VariableIdSplit \n",
- "0 {'Id': 'SSO.EG-AS.Alarms.CommLossPlantDevice',... \n",
- "0 {'Id': 'SSO.EG-AS.Signals.PPC.IsCurtailment', ... \n",
- "0 {'Id': 'SSO.EG-AS.Signals.State.IsDay', 'Names... \n",
- "0 {'Id': 'SSO.EG-AS.Parameters.ContractDuration'... \n",
- "0 {'Id': 'SSO.EG-AS.Parameters.RegionKey', 'Name... \n",
- ".. ... \n",
- "0 {'Id': 'SSO.EG-AS.Signals.PPC.SetpointActivePo... \n",
- "0 {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationD... \n",
- "0 {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationH... \n",
- "0 {'Id': 'SSO.EG-AS.Signals.Weather.IrradiationI... \n",
- "0 {'Id': 'SSO.EG-AS.Signals.Status', 'Namespace'... \n",
- "\n",
- "[118 rows x 6 columns]"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Analytics helper\n",
- "sites.variables_as_dataframe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['3:1:SSO.EG-AS']"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "sites.list_of_ids()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'3:1:SSO.EG-AS'"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Selecting the second site\n",
- "first_site_id = sites.list_of_ids()[0]\n",
- "# first_site_id = '14:1:BE.DK-ADU'\n",
- "first_site_id"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Id | \n",
- " Name | \n",
- " Type | \n",
- " Props | \n",
- " Vars | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1 | \n",
- " EG-AS-TS01-I01-SM10-CH1 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '1'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH10 | \n",
- " EG-AS-TS01-I01-SM10-CH10 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '10'}, ... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH2 | \n",
- " EG-AS-TS01-I01-SM10-CH2 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '2'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH3 | \n",
- " EG-AS-TS01-I01-SM10-CH3 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '3'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH4 | \n",
- " EG-AS-TS01-I01-SM10-CH4 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '4'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 2933 | \n",
- " 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH5 | \n",
- " EG-AS-TS11-I22-SM9-CH5 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '5'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " 2934 | \n",
- " 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH6 | \n",
- " EG-AS-TS11-I22-SM9-CH6 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '6'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " 2935 | \n",
- " 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH7 | \n",
- " EG-AS-TS11-I22-SM9-CH7 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '7'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " 2936 | \n",
- " 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH8 | \n",
- " EG-AS-TS11-I22-SM9-CH8 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '8'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- " 2937 | \n",
- " 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9 | \n",
- " EG-AS-TS11-I22-SM9-CH9 | \n",
- " StringSetType | \n",
- " [{'DisplayName': 'ChannelNo', 'Value': '9'}, {... | \n",
- " [{'DisplayName': 'StringDisconnected', 'Id': '... | \n",
- "
\n",
- " \n",
- "
\n",
- "
2938 rows × 5 columns
\n",
- "
"
- ],
- "text/plain": [
- " Id Name \\\n",
- "0 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1 EG-AS-TS01-I01-SM10-CH1 \n",
- "1 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH10 EG-AS-TS01-I01-SM10-CH10 \n",
- "2 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH2 EG-AS-TS01-I01-SM10-CH2 \n",
- "3 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH3 EG-AS-TS01-I01-SM10-CH3 \n",
- "4 3:1:SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH4 EG-AS-TS01-I01-SM10-CH4 \n",
- "... ... ... \n",
- "2933 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH5 EG-AS-TS11-I22-SM9-CH5 \n",
- "2934 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH6 EG-AS-TS11-I22-SM9-CH6 \n",
- "2935 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH7 EG-AS-TS11-I22-SM9-CH7 \n",
- "2936 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH8 EG-AS-TS11-I22-SM9-CH8 \n",
- "2937 3:1:SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9 EG-AS-TS11-I22-SM9-CH9 \n",
- "\n",
- " Type Props \\\n",
- "0 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '1'}, {... \n",
- "1 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '10'}, ... \n",
- "2 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '2'}, {... \n",
- "3 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '3'}, {... \n",
- "4 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '4'}, {... \n",
- "... ... ... \n",
- "2933 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '5'}, {... \n",
- "2934 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '6'}, {... \n",
- "2935 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '7'}, {... \n",
- "2936 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '8'}, {... \n",
- "2937 StringSetType [{'DisplayName': 'ChannelNo', 'Value': '9'}, {... \n",
- "\n",
- " Vars \n",
- "0 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "1 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "2 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "3 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "4 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "... ... \n",
- "2933 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "2934 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "2935 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "2936 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "2937 [{'DisplayName': 'StringDisconnected', 'Id': '... \n",
- "\n",
- "[2938 rows x 5 columns]"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Get all stringsets for one park\n",
- "string_sets_for_first_park_as_json = model.get_object_descendants(\n",
- " \"StringSetType\", [first_site_id], \"PV_Assets\"\n",
- ")\n",
- "string_sets = AnalyticsHelper(string_sets_for_first_park_as_json)\n",
- "string_sets.dataframe"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Query Parameters"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [],
- "source": [
- "variable_list =string_sets.variables_as_list([\"DCPower\"])\n",
- "start_time=(datetime.datetime.now() - datetime.timedelta(30))\n",
- "end_time=(datetime.datetime.now() - datetime.timedelta(29))\n",
- "pro_interval=60*1000\n",
- "agg_name=\"Average\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "import logging\n",
- "from datetime import timedelta"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [],
- "source": [
- "import asyncio\n",
- "import aiohttp\n",
- "from aiohttp import ClientSession\n",
- "from asyncio import Semaphore\n",
- "\n",
- "async def get_historical_aggregated_values_batch_time_vars_async(self, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list, max_data_points: int = 10000, max_retries: int = 3, retry_delay: int = 5, max_concurrent_requests: int = 10) -> pd.DataFrame:\n",
- " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
- " logger = logging.getLogger(__name__)\n",
- " # Convert variable list to the required format\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
- "\n",
- " # Calculate total time range in milliseconds\n",
- " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
- "\n",
- " # Estimate the number of intervals based on the processing interval\n",
- " estimated_intervals = total_time_range_ms / pro_interval\n",
- "\n",
- " # Calculate the maximum number of variables that can be processed in each batch\n",
- " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
- "\n",
- " # Split variables into batches\n",
- " variable_batches = [\n",
- " extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
- "\n",
- " # Calculate the number of time batches needed based on max data points and estimated intervals\n",
- " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
- "\n",
- " # Calculate time batch size in milliseconds\n",
- " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
- "\n",
- " all_results = []\n",
- " semaphore = Semaphore(max_concurrent_requests)\n",
- "\n",
- " async def process_batch(variables, time_batch):\n",
- " async with semaphore:\n",
- " batch_start_ms = time_batch * time_batch_size_ms\n",
- " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
- " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
- " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
- "\n",
- " body = {\n",
- " **self.body,\n",
- " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"ReadValueIds\": variables,\n",
- " \"AggregateName\": agg_name\n",
- " }\n",
- "\n",
- " for attempt in range(max_retries):\n",
- " try:\n",
- " async with ClientSession() as session:\n",
- " async with session.post(\n",
- " f\"{self.rest_url}values/historicalaggregated\",\n",
- " json=body,\n",
- " headers=self.headers\n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " content = await response.json()\n",
- " break\n",
- " except aiohttp.ClientError as e:\n",
- " if attempt < max_retries - 1:\n",
- " wait_time = retry_delay * (2 ** attempt)\n",
- " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
- " await asyncio.sleep(wait_time)\n",
- " else:\n",
- " logger.error(f\"Max retries reached. Error: {e}\")\n",
- " raise RuntimeError(f'Error message {e}')\n",
- "\n",
- " self._check_content(content)\n",
- "\n",
- " df_result = pd.json_normalize(\n",
- " content, \n",
- " record_path=['HistoryReadResults', 'DataValues'], \n",
- " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n",
- " ['HistoryReadResults', 'NodeId','Id'],\n",
- " ['HistoryReadResults', 'NodeId','Namespace']]\n",
- " )\n",
- " return df_result\n",
- "\n",
- " tasks = [\n",
- " process_batch(variables, time_batch)\n",
- " for variables in variable_batches\n",
- " for time_batch in range(max_time_batches)\n",
- " ]\n",
- "\n",
- " results = await asyncio.gather(*tasks)\n",
- " all_results.extend(results)\n",
- "\n",
- " # Combine all batch results into a single DataFrame\n",
- " logger.info(\"Combining all batches...\")\n",
- " combined_df = pd.concat(all_results, ignore_index=True)\n",
- " # Process and return the combined DataFrame\n",
- " columns = {\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " }\n",
- " return self._process_df(combined_df, columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2024-07-22 14:10:51,694 - INFO - Combining all batches...\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Timestamp | \n",
- " ValueType | \n",
- " Value | \n",
- " StatusCode | \n",
- " StatusSymbol | \n",
- " IdType | \n",
- " Id | \n",
- " Namespace | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2024-06-22T14:07:19.691118Z | \n",
- " Double | \n",
- " 13861.390625 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2024-06-22T14:08:19.691118Z | \n",
- " Double | \n",
- " 13998.080078 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2024-06-22T14:09:19.691118Z | \n",
- " Double | \n",
- " 13927.273438 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2024-06-22T14:10:19.691118Z | \n",
- " Double | \n",
- " 13916.458984 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2024-06-22T14:11:19.691118Z | \n",
- " Double | \n",
- " 13997.431641 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 4230715 | \n",
- " 2024-06-23T14:02:19.691118Z | \n",
- " Double | \n",
- " 13705.159405 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230716 | \n",
- " 2024-06-23T14:03:19.691118Z | \n",
- " Double | \n",
- " 13593.904297 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230717 | \n",
- " 2024-06-23T14:04:19.691118Z | \n",
- " Double | \n",
- " 13629.435547 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230718 | \n",
- " 2024-06-23T14:05:19.691118Z | \n",
- " Double | \n",
- " 13530.140625 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230719 | \n",
- " 2024-06-23T14:06:19.691118Z | \n",
- " Double | \n",
- " 13501.579102 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- "
\n",
- "
4230720 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " Timestamp ValueType Value StatusCode \\\n",
- "0 2024-06-22T14:07:19.691118Z Double 13861.390625 1 \n",
- "1 2024-06-22T14:08:19.691118Z Double 13998.080078 1 \n",
- "2 2024-06-22T14:09:19.691118Z Double 13927.273438 1 \n",
- "3 2024-06-22T14:10:19.691118Z Double 13916.458984 1 \n",
- "4 2024-06-22T14:11:19.691118Z Double 13997.431641 1 \n",
- "... ... ... ... ... \n",
- "4230715 2024-06-23T14:02:19.691118Z Double 13705.159405 1 \n",
- "4230716 2024-06-23T14:03:19.691118Z Double 13593.904297 1 \n",
- "4230717 2024-06-23T14:04:19.691118Z Double 13629.435547 1 \n",
- "4230718 2024-06-23T14:05:19.691118Z Double 13530.140625 1 \n",
- "4230719 2024-06-23T14:06:19.691118Z Double 13501.579102 1 \n",
- "\n",
- " StatusSymbol IdType \\\n",
- "0 Good 1 \n",
- "1 Good 1 \n",
- "2 Good 1 \n",
- "3 Good 1 \n",
- "4 Good 1 \n",
- "... ... ... \n",
- "4230715 Good 1 \n",
- "4230716 Good 1 \n",
- "4230717 Good 1 \n",
- "4230718 Good 1 \n",
- "4230719 Good 1 \n",
- "\n",
- " Id Namespace \n",
- "0 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "1 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "2 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "3 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "4 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "... ... ... \n",
- "4230715 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230716 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230717 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230718 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230719 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "\n",
- "[4230720 rows x 8 columns]"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 1 day aggregated historical data\n",
- "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n",
- " opc_data,\n",
- " start_time=start_time,\n",
- " end_time=end_time,\n",
- " pro_interval=pro_interval,\n",
- " agg_name=agg_name,\n",
- " variable_list=variable_list,\n",
- " max_data_points=10000,\n",
- " max_concurrent_requests=40\n",
- ")\n",
- "one_day_historical_data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Batching with Async Refactoring"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [],
- "source": [
- "import logging\n",
- "import asyncio\n",
- "import aiohttp\n",
- "from aiohttp import ClientSession\n",
- "from asyncio import Semaphore\n",
- "from datetime import timedelta\n",
- "from typing import Dict, List, Tuple\n",
- "\n",
- "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
- "logger = logging.getLogger(__name__)\n",
- "\n",
- "async def generate_time_batches(start_time: datetime, end_time: datetime, pro_interval: int, max_data_points: int) -> List[tuple]:\n",
- " \"\"\"Generate time batches based on start time, end time, processing interval, and batch size\"\"\"\n",
- "\n",
- " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
- " estimated_intervals = total_time_range_ms / pro_interval\n",
- " \n",
- " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
- " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
- "\n",
- " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
- "\n",
- " return total_time_range_ms, max_variables_per_batch, time_batch_size_ms, max_time_batches\n",
- "\n",
- "def generate_variable_batches(start_time, end_time, pro_interval, variable_list: List[Dict[str, str]], max_data_points) -> List:\n",
- " \"\"\"Generate variable batches based on the variable list and batch size\"\"\"\n",
- "\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
- " max_variables_per_batch = generate_time_batches(start_time, end_time, pro_interval, max_data_points)[1]\n",
- "\n",
- " variable_batches = [\n",
- " extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)\n",
- " ]\n",
- "\n",
- " return variable_batches\n",
- "\n",
- "def _prepare_body(\n",
- " start_time: datetime,\n",
- " end_time: datetime,\n",
- " pro_interval: int,\n",
- " variable_list: List[Dict[str, str]], \n",
- " agg_name: str,\n",
- " ) -> Dict:\n",
- " \"\"\"\n",
- " Prepare the request body for the API call.\n",
- " \"\"\"\n",
- " total_time_range_ms, max_variables_per_batch, time_batch_size_ms, max_time_batches = generate_time_batches(\n",
- " start_time, end_time, pro_interval, 10000)\n",
- "\n",
- " for time_batch in range(max_time_batches):\n",
- " batch_start_ms = time_batch * time_batch_size_ms\n",
- " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
- " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
- " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
- "\n",
- " variable_batches = generate_variable_batches(variable_list)\n",
- "\n",
- " for variables in variable_batches:\n",
- " body = {\n",
- " **opc_data.body,\n",
- " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"ReadValueIds\": variables,\n",
- " \"AggregateName\": agg_name\n",
- " }\n",
- " return body\n",
- " \n",
- "def process_batch(content: dict) -> pd.DataFrame:\n",
- " \"\"\" Process individual batch of data \"\"\"\n",
- " \n",
- " df_list = []\n",
- " for item in content[\"HistoryReadResults\"]:\n",
- " df = pd.json_normalize(item[\"DataValues\"])\n",
- " for key, value in item[\"NodeId\"].items():\n",
- " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n",
- " df_list.append(df)\n",
- " \n",
- " if df_list:\n",
- " df_result = pd.concat(df_list)\n",
- " df_result.reset_index(inplace=True, drop=True)\n",
- " return df_result\n",
- " else:\n",
- " return pd.DataFrame()\n",
- " \n",
- "async def make_async_api_request(opc_data, start_time:datetime, end_time:datetime,\n",
- " pro_interval: int, variable_list: List[Dict[str, str]], agg_name: str,\n",
- " semaphore, max_retries: int = 3, retry_delay: int = 5) -> dict:\n",
- " \n",
- " \"\"\"Make API request for the given time range and variable list\"\"\"\n",
- "\n",
- " async with semaphore:\n",
- " body = _prepare_body(\n",
- " start_time, \n",
- " end_time, \n",
- " pro_interval, \n",
- " variable_list,\n",
- " agg_name\n",
- " )\n",
- " for attempt in range(max_retries):\n",
- " try:\n",
- " async with ClientSession() as session:\n",
- " async with session.post(\n",
- " f\"{opcua_rest_url}values/historicalaggregated\",\n",
- " json=body,\n",
- " headers=opc_data.headers\n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " content = await response.json()\n",
- " break\n",
- " except aiohttp.ClientError as e:\n",
- " if attempt < max_retries - 1:\n",
- " wait_time = retry_delay * (2 ** attempt)\n",
- " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
- " await asyncio.sleep(wait_time)\n",
- " else:\n",
- " logger.error(f\"Max retries reached. Error: {e}\")\n",
- " raise RuntimeError(f'Error message {e}')\n",
- "\n",
- " opc_data._check_content(content)\n",
- "\n",
- " df_result = process_batch(content)\n",
- " return df_result\n",
- " \n",
- "async def process_api_response(opc_data, start_time:datetime, end_time:datetime,\n",
- " pro_interval: int, variable_list: List[Dict[str, str]], agg_name: str,\n",
- " max_concurrent_requests: int = 10) -> pd.DataFrame:\n",
- " \"\"\" Process API response asynchronously and return the result dataframe \"\"\"\n",
- " all_results = []\n",
- " semaphore = Semaphore(max_concurrent_requests)\n",
- "\n",
- " tasks = [\n",
- " make_async_api_request(opc_data, start_time, end_time, pro_interval, variable_list, agg_name, semaphore)\n",
- " ]\n",
- " results = await asyncio.gather(*tasks)\n",
- " all_results.extend(results)\n",
- " \n",
- " if all_results:\n",
- " combined_df = pd.concat(all_results, ignore_index=True)\n",
- " combined_df.reset_index(inplace=True, drop=True)\n",
- " columns = {\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " }\n",
- " return opc_data._process_df(combined_df, columns)\n",
- " else:\n",
- " return pd.DataFrame()\n",
- " \n",
- "async def get_historical_aggregated_values_async(\n",
- " opc_data,\n",
- " start_time: datetime,\n",
- " end_time: datetime,\n",
- " pro_interval: int,\n",
- " variable_list: List[Dict[str, str]],\n",
- " agg_name: str,\n",
- ") -> pd.DataFrame:\n",
- " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n",
- "\n",
- " \n",
- " result_df = await process_api_response(opc_data, start_time, end_time, pro_interval, variable_list, agg_name)\n",
- "\n",
- " return result_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1 day aggregated historical inverter data in asyncio process\n",
- "one_days_historic_inverter_data2 = await get_historical_aggregated_values_batch_time_vars_async(\n",
- " start_time=start_time,\n",
- " end_time=end_time,\n",
- " pro_interval=60*1000,\n",
- " agg_name=\"Average\",\n",
- " variable_list=string_sets.variables_as_list([\"DCPower\"])\n",
- ")\n",
- "one_days_historic_inverter_data2"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Batching with Async"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "import logging\n",
- "import asyncio\n",
- "import aiohttp\n",
- "from aiohttp import ClientSession\n",
- "from asyncio import Semaphore\n",
- "from datetime import timedelta\n",
- "from typing import List, Dict, Tuple\n",
- "\n",
- "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
- "logger = logging.getLogger(__name__)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def _fetch_data(self, endpoint: str, body: Dict, max_retries:int, retry_delay:int) -> pd.DataFrame:\n",
- " \"\"\"\n",
- " Fetch data from the API and return it as a DataFrame.\n",
- " \"\"\"\n",
- " for attempt in range(max_retries):\n",
- " try:\n",
- " async with ClientSession() as session:\n",
- " async with session.post(\n",
- " rest_url=self.rest_url,\n",
- " endpoint=endpoint,\n",
- " json=body,\n",
- " headers=opc_data.headers\n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " content = await response.json()\n",
- " except aiohttp.ClientError as e:\n",
- " if attempt < max_retries - 1:\n",
- " wait_time = retry_delay * (2 ** attempt)\n",
- " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
- " await asyncio.sleep(wait_time)\n",
- " else:\n",
- " logger.error(f\"Max retries reached. Error: {e}\")\n",
- " raise RuntimeError(f'Error message {e}')\n",
- " opc_data._check_content(content)\n",
- "\n",
- " df_list = []\n",
- " for item in content[\"HistoryReadResults\"]:\n",
- " df = pd.json_normalize(item[\"DataValues\"])\n",
- " for key, value in item[\"NodeId\"].items():\n",
- " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n",
- " df_list.append(df)\n",
- " \n",
- " if df_list:\n",
- " df_result = pd.concat(df_list)\n",
- " df_result.reset_index(inplace=True, drop=True)\n",
- " return df_result\n",
- " \n",
- " return df_result"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def process_batch(self, semaphore, variables, time_batch, time_batch_size_ms, total_time_range_ms, max_retries, retry_delay):\n",
- " async with semaphore:\n",
- " batch_start_ms = time_batch * time_batch_size_ms\n",
- " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
- " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
- " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
- "\n",
- " body = {\n",
- " **self.body,\n",
- " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"ReadValueIds\": variables,\n",
- " \"AggregateName\": agg_name\n",
- " }\n",
- "\n",
- " for attempt in range(max_retries):\n",
- " try:\n",
- " async with ClientSession() as session:\n",
- " async with session.post(\n",
- " f\"{self.rest_url}values/historicalaggregated\",\n",
- " json=body,\n",
- " headers=opc_data.headers\n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " content = await response.json()\n",
- " break\n",
- " except aiohttp.ClientError as e:\n",
- " if attempt < max_retries - 1:\n",
- " wait_time = retry_delay * (2 ** attempt)\n",
- " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
- " await asyncio.sleep(wait_time)\n",
- " else:\n",
- " logger.error(f\"Max retries reached. Error: {e}\")\n",
- " raise RuntimeError(f'Error message {e}')\n",
- "\n",
- " opc_data._check_content(content)\n",
- "\n",
- " df_list = []\n",
- " for item in content[\"HistoryReadResults\"]:\n",
- " df = pd.json_normalize(item[\"DataValues\"])\n",
- " for key, value in item[\"NodeId\"].items():\n",
- " df[f\"HistoryReadResults.NodeId.{key}\"] = value\n",
- " df_list.append(df)\n",
- " \n",
- " if df_list:\n",
- " df_result = pd.concat(df_list)\n",
- " df_result.reset_index(inplace=True, drop=True)\n",
- " return df_result\n",
- " \n",
- " return df_result"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def get_historical_aggregated_values_batch_time_vars_async1(self, start_time: datetime, end_time: datetime, pro_interval: int, \n",
- " agg_name: str, variable_list: list,max_data_points: int = 10000,\n",
- " max_retries: int = 3, retry_delay: int = 5, \n",
- " max_concurrent_requests: int = 10) -> pd.DataFrame:\n",
- " \n",
- " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
- " estimated_intervals = total_time_range_ms / pro_interval\n",
- " \n",
- " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
- " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
- " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
- "\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
- " variable_batches = [\n",
- " extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)\n",
- " ]\n",
- "\n",
- " all_results = []\n",
- " semaphore = Semaphore(max_concurrent_requests)\n",
- "\n",
- " tasks = [\n",
- " process_batch(self,semaphore, variables, time_batch, time_batch_size_ms, total_time_range_ms, max_retries, retry_delay)\n",
- " for variables in variable_batches\n",
- " for time_batch in range(max_time_batches)\n",
- " ]\n",
- "\n",
- " results = await asyncio.gather(*tasks)\n",
- " all_results.extend(results)\n",
- "\n",
- " logger.info(\"Combining all batches...\")\n",
- " combined_df = pd.concat(all_results, ignore_index=True)\n",
- " \n",
- " columns = {\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " }\n",
- " return self._process_df(combined_df, columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2024-07-22 12:50:34,233 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-22 12:51:01,813 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-22 12:51:51,706 - INFO - Combining all batches...\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Timestamp | \n",
- " ValueType | \n",
- " Value | \n",
- " StatusCode | \n",
- " StatusSymbol | \n",
- " IdType | \n",
- " Id | \n",
- " Namespace | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2024-06-22T12:36:10.687988Z | \n",
- " Double | \n",
- " 17479.763672 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2024-06-22T12:37:10.687988Z | \n",
- " Double | \n",
- " 17516.169922 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2024-06-22T12:38:10.687988Z | \n",
- " Double | \n",
- " 17444.314453 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2024-06-22T12:39:10.687988Z | \n",
- " Double | \n",
- " 17566.621094 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2024-06-22T12:40:10.687988Z | \n",
- " Double | \n",
- " 17619.875000 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 4230715 | \n",
- " 2024-06-23T12:31:10.687988Z | \n",
- " Double | \n",
- " 15664.324219 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230716 | \n",
- " 2024-06-23T12:32:10.687988Z | \n",
- " Double | \n",
- " 15755.339844 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230717 | \n",
- " 2024-06-23T12:33:10.687988Z | \n",
- " Double | \n",
- " 15869.092773 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230718 | \n",
- " 2024-06-23T12:34:10.687988Z | \n",
- " Double | \n",
- " 15772.485352 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230719 | \n",
- " 2024-06-23T12:35:10.687988Z | \n",
- " Double | \n",
- " 15702.324219 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- "
\n",
- "
4230720 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " Timestamp ValueType Value StatusCode \\\n",
- "0 2024-06-22T12:36:10.687988Z Double 17479.763672 1 \n",
- "1 2024-06-22T12:37:10.687988Z Double 17516.169922 1 \n",
- "2 2024-06-22T12:38:10.687988Z Double 17444.314453 1 \n",
- "3 2024-06-22T12:39:10.687988Z Double 17566.621094 1 \n",
- "4 2024-06-22T12:40:10.687988Z Double 17619.875000 1 \n",
- "... ... ... ... ... \n",
- "4230715 2024-06-23T12:31:10.687988Z Double 15664.324219 1 \n",
- "4230716 2024-06-23T12:32:10.687988Z Double 15755.339844 1 \n",
- "4230717 2024-06-23T12:33:10.687988Z Double 15869.092773 1 \n",
- "4230718 2024-06-23T12:34:10.687988Z Double 15772.485352 1 \n",
- "4230719 2024-06-23T12:35:10.687988Z Double 15702.324219 1 \n",
- "\n",
- " StatusSymbol IdType \\\n",
- "0 Good 1 \n",
- "1 Good 1 \n",
- "2 Good 1 \n",
- "3 Good 1 \n",
- "4 Good 1 \n",
- "... ... ... \n",
- "4230715 Good 1 \n",
- "4230716 Good 1 \n",
- "4230717 Good 1 \n",
- "4230718 Good 1 \n",
- "4230719 Good 1 \n",
- "\n",
- " Id Namespace \n",
- "0 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "1 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "2 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "3 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "4 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "... ... ... \n",
- "4230715 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230716 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230717 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230718 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230719 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "\n",
- "[4230720 rows x 8 columns]"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 1 day aggregated historical data\n",
- "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async1(\n",
- " opc_data,\n",
- " start_time=start_time,\n",
- " end_time=end_time,\n",
- " pro_interval=pro_interval,\n",
- " agg_name=agg_name,\n",
- " variable_list=variable_list,\n",
- " max_data_points=10000,\n",
- " max_concurrent_requests=40\n",
- ")\n",
- "one_day_historical_data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [],
- "source": [
- "import asyncio\n",
- "import aiohttp\n",
- "from aiohttp import ClientSession\n",
- "from asyncio import Semaphore\n",
- "\n",
- "async def get_historical_aggregated_values_batch_time_vars_async(self, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list, max_data_points: int = 10000, max_retries: int = 3, retry_delay: int = 5, max_concurrent_requests: int = 10) -> pd.DataFrame:\n",
- " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
- " logger = logging.getLogger(__name__)\n",
- " # Convert variable list to the required format\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
- "\n",
- " # Calculate total time range in milliseconds\n",
- " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
- "\n",
- " # Estimate the number of intervals based on the processing interval\n",
- " estimated_intervals = total_time_range_ms / pro_interval\n",
- "\n",
- " # Calculate the maximum number of variables that can be processed in each batch\n",
- " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
- "\n",
- " # Split variables into batches\n",
- " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
- "\n",
- " # Calculate the number of time batches needed based on max data points and estimated intervals\n",
- " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
- "\n",
- " # Calculate time batch size in milliseconds\n",
- " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
- "\n",
- " all_results = []\n",
- " semaphore = Semaphore(max_concurrent_requests)\n",
- "\n",
- " async def process_batch(variables, time_batch):\n",
- " async with semaphore:\n",
- " batch_start_ms = time_batch * time_batch_size_ms\n",
- " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
- " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
- " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
- "\n",
- " body = {\n",
- " **self.body,\n",
- " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"ReadValueIds\": variables,\n",
- " \"AggregateName\": agg_name\n",
- " }\n",
- "\n",
- " for attempt in range(max_retries):\n",
- " try:\n",
- " async with ClientSession() as session:\n",
- " async with session.post(\n",
- " f\"{self.rest_url}values/historicalaggregated\",\n",
- " json=body,\n",
- " headers=self.headers\n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " content = await response.json()\n",
- " break\n",
- " except aiohttp.ClientError as e:\n",
- " if attempt < max_retries - 1:\n",
- " wait_time = retry_delay * (2 ** attempt)\n",
- " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
- " await asyncio.sleep(wait_time)\n",
- " else:\n",
- " logger.error(f\"Max retries reached. Error: {e}\")\n",
- " raise RuntimeError(f'Error message {e}')\n",
- "\n",
- " self._check_content(content)\n",
- "\n",
- " df_result = process_batch_api_response(content)\n",
- " return df_result\n",
- "\n",
- " tasks = [\n",
- " process_batch(variables, time_batch)\n",
- " for variables in variable_batches\n",
- " for time_batch in range(max_time_batches)\n",
- " ]\n",
- "\n",
- " results = await asyncio.gather(*tasks)\n",
- " all_results.extend(results)\n",
- "\n",
- " # Combine all batch results into a single DataFrame\n",
- " logger.info(\"Combining all batches...\")\n",
- " combined_df = pd.concat(all_results, ignore_index=True)\n",
- " # Process and return the combined DataFrame\n",
- " columns = {\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " }\n",
- " return self._process_df(combined_df, columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1 day aggregated historical data\n",
- "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n",
- " opc_data,\n",
- " start_time=start_time,\n",
- " end_time=end_time,\n",
- " pro_interval=pro_interval,\n",
- " agg_name=agg_name,\n",
- " variable_list=variable_list,\n",
- " max_data_points=10000,\n",
- " max_concurrent_requests=40\n",
- ")\n",
- "one_day_historical_data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Async with ClientPool"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [],
- "source": [
- "import asyncio\n",
- "import aiohttp\n",
- "from aiohttp import ClientSession\n",
- "from asyncio import Semaphore\n",
- "from typing import List, Dict, Any\n",
- "from datetime import datetime, timedelta\n",
- "import pandas as pd\n",
- "import logging\n",
- "from pydantic import AnyUrl, ValidationError"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [],
- "source": [
- "class ClientPool:\n",
- " def __init__(self, num_clients: int, rest_url: str, headers: Dict[str, str]):\n",
- " self.clients = asyncio.Queue()\n",
- " for _ in range(num_clients):\n",
- " self.clients.put_nowait(aiohttp.ClientSession(base_url=rest_url, headers=headers))\n",
- " self.num_clients = num_clients\n",
- "\n",
- " async def get_client(self):\n",
- " client = await self.clients.get()\n",
- " return client\n",
- "\n",
- " async def release_client(self, client):\n",
- " await self.clients.put(client)\n",
- "\n",
- " async def close_all(self):\n",
- " while not self.clients.empty():\n",
- " client = await self.clients.get()\n",
- " await client.close()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def request_from_api_async(\n",
- " client_pool: ClientPool,\n",
- " method: str,\n",
- " endpoint: str,\n",
- " data: str = None,\n",
- " params: Dict[str, Any] = None,\n",
- " extended_timeout: bool = False,\n",
- ") -> Dict[str, Any]:\n",
- " timeout = aiohttp.ClientTimeout(total=300 if extended_timeout else 30)\n",
- " client = await client_pool.get_client()\n",
- " \n",
- " try:\n",
- " if method == \"GET\":\n",
- " async with client.get(endpoint, params=params, timeout=timeout) as response:\n",
- " response.raise_for_status()\n",
- " if 'application/json' in response.headers.get('Content-Type', ''):\n",
- " return await response.json()\n",
- " else:\n",
- " return {\"error\": \"Non-JSON response\", \"content\": await response.text()}\n",
- " elif method == \"POST\":\n",
- " async with client.post(endpoint, data=data, params=params, timeout=timeout) as response:\n",
- " response.raise_for_status()\n",
- " if 'application/json' in response.headers.get('Content-Type', ''):\n",
- " return await response.json()\n",
- " else:\n",
- " return {\"error\": \"Non-JSON response\", \"content\": await response.text()}\n",
- " else:\n",
- " raise ValidationError(\"Unsupported method\")\n",
- " finally:\n",
- " await client_pool.release_client(client)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def get_historical_aggregated_values_batch_time_vars_async(\n",
- " self,\n",
- " start_time: datetime,\n",
- " end_time: datetime,\n",
- " pro_interval: int,\n",
- " agg_name: str,\n",
- " variable_list: List[str],\n",
- " max_data_points: int = 100000,\n",
- " max_retries: int = 3,\n",
- " retry_delay: int = 5,\n",
- " max_concurrent_requests: int = 50\n",
- ") -> pd.DataFrame:\n",
- " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
- " logger = logging.getLogger(__name__)\n",
- "\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
- " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
- " estimated_intervals = total_time_range_ms / pro_interval\n",
- " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
- " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
- " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
- " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
- "\n",
- " all_results = []\n",
- " semaphore = Semaphore(max_concurrent_requests)\n",
- " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n",
- "\n",
- " async def process_batch(variables, time_batch):\n",
- " async with semaphore:\n",
- " batch_start_ms = time_batch * time_batch_size_ms\n",
- " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
- " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
- " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
- "\n",
- " body = {\n",
- " **self.body,\n",
- " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"ReadValueIds\": variables,\n",
- " \"AggregateName\": agg_name\n",
- " }\n",
- "\n",
- " for attempt in range(max_retries):\n",
- " try:\n",
- " content = await request_from_api_async(\n",
- " client_pool,\n",
- " method=\"POST\",\n",
- " endpoint=f\"/values/historicalaggregated\",\n",
- " data=json.dumps(body, default=self.json_serial),\n",
- " extended_timeout=True\n",
- " )\n",
- " break\n",
- " except (aiohttp.ClientError, ValidationError) as e:\n",
- " if attempt < max_retries - 1:\n",
- " wait_time = retry_delay * (2 ** attempt)\n",
- " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
- " await asyncio.sleep(wait_time)\n",
- " else:\n",
- " logger.error(f\"Max retries reached. Error: {e}\")\n",
- " raise RuntimeError(f'Error message {e}')\n",
- "\n",
- " self._check_content(content)\n",
- "\n",
- " df_result = pd.json_normalize(\n",
- " content, \n",
- " record_path=['HistoryReadResults', 'DataValues'], \n",
- " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n",
- " ['HistoryReadResults', 'NodeId','Id'],\n",
- " ['HistoryReadResults', 'NodeId','Namespace']]\n",
- " )\n",
- " return df_result\n",
- "\n",
- " tasks = [\n",
- " process_batch(variables, time_batch)\n",
- " for variables in variable_batches\n",
- " for time_batch in range(max_time_batches)\n",
- " ]\n",
- "\n",
- " try:\n",
- " results = await asyncio.gather(*tasks)\n",
- " all_results.extend(results)\n",
- "\n",
- " logger.info(\"Combining all batches...\")\n",
- " combined_df = pd.concat(all_results, ignore_index=True)\n",
- " columns = {\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " }\n",
- " return self._process_df(combined_df, columns)\n",
- " finally:\n",
- " await client_pool.close_all()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2024-07-22 14:13:58,459 - INFO - Combining all batches...\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Timestamp | \n",
- " ValueType | \n",
- " Value | \n",
- " StatusCode | \n",
- " StatusSymbol | \n",
- " IdType | \n",
- " Id | \n",
- " Namespace | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 2024-06-22T14:07:19.691118Z | \n",
- " Double | \n",
- " 13861.390625 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2024-06-22T14:08:19.691118Z | \n",
- " Double | \n",
- " 13998.080078 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2024-06-22T14:09:19.691118Z | \n",
- " Double | \n",
- " 13927.273438 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2024-06-22T14:10:19.691118Z | \n",
- " Double | \n",
- " 13916.458984 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 2024-06-22T14:11:19.691118Z | \n",
- " Double | \n",
- " 13997.431641 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 4230715 | \n",
- " 2024-06-23T14:02:19.691118Z | \n",
- " Double | \n",
- " 13705.159405 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230716 | \n",
- " 2024-06-23T14:03:19.691118Z | \n",
- " Double | \n",
- " 13593.904297 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230717 | \n",
- " 2024-06-23T14:04:19.691118Z | \n",
- " Double | \n",
- " 13629.435547 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230718 | \n",
- " 2024-06-23T14:05:19.691118Z | \n",
- " Double | \n",
- " 13530.140625 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 4230719 | \n",
- " 2024-06-23T14:06:19.691118Z | \n",
- " Double | \n",
- " 13501.579102 | \n",
- " 1 | \n",
- " Good | \n",
- " 1 | \n",
- " SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower | \n",
- " 3 | \n",
- "
\n",
- " \n",
- "
\n",
- "
4230720 rows × 8 columns
\n",
- "
"
- ],
- "text/plain": [
- " Timestamp ValueType Value StatusCode \\\n",
- "0 2024-06-22T14:07:19.691118Z Double 13861.390625 1 \n",
- "1 2024-06-22T14:08:19.691118Z Double 13998.080078 1 \n",
- "2 2024-06-22T14:09:19.691118Z Double 13927.273438 1 \n",
- "3 2024-06-22T14:10:19.691118Z Double 13916.458984 1 \n",
- "4 2024-06-22T14:11:19.691118Z Double 13997.431641 1 \n",
- "... ... ... ... ... \n",
- "4230715 2024-06-23T14:02:19.691118Z Double 13705.159405 1 \n",
- "4230716 2024-06-23T14:03:19.691118Z Double 13593.904297 1 \n",
- "4230717 2024-06-23T14:04:19.691118Z Double 13629.435547 1 \n",
- "4230718 2024-06-23T14:05:19.691118Z Double 13530.140625 1 \n",
- "4230719 2024-06-23T14:06:19.691118Z Double 13501.579102 1 \n",
- "\n",
- " StatusSymbol IdType \\\n",
- "0 Good 1 \n",
- "1 Good 1 \n",
- "2 Good 1 \n",
- "3 Good 1 \n",
- "4 Good 1 \n",
- "... ... ... \n",
- "4230715 Good 1 \n",
- "4230716 Good 1 \n",
- "4230717 Good 1 \n",
- "4230718 Good 1 \n",
- "4230719 Good 1 \n",
- "\n",
- " Id Namespace \n",
- "0 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "1 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "2 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "3 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "4 SSO.EG-AS.S1.Z5.TS01.I01.SM10.CH1.Signals.DCPower 3 \n",
- "... ... ... \n",
- "4230715 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230716 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230717 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230718 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "4230719 SSO.EG-AS.S1.Z1.TS11.I22.SM9.CH9.Signals.DCPower 3 \n",
- "\n",
- "[4230720 rows x 8 columns]"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 1 day aggregated historical data\n",
- "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_async(\n",
- " opc_data,\n",
- " start_time=start_time,\n",
- " end_time=end_time,\n",
- " pro_interval=pro_interval,\n",
- " agg_name=agg_name,\n",
- " variable_list=variable_list,\n",
- " max_data_points=10000,\n",
- " max_concurrent_requests=50\n",
- ")\n",
- "one_day_historical_data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Async with Data Handler"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [],
- "source": [
- "import asyncio\n",
- "import aiohttp\n",
- "import pandas as pd\n",
- "import sqlite3\n",
- "import tempfile\n",
- "import os\n",
- "import json\n",
- "from asyncio import Semaphore\n",
- "from typing import List, Dict, Any\n",
- "from datetime import datetime, timedelta\n",
- "import logging\n",
- "import pyarrow as pa\n",
- "import pyarrow.parquet as pq"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [],
- "source": [
- "class DataHandler:\n",
- " def __init__(self, max_memory_rows=10000):\n",
- " self.max_memory_rows = max_memory_rows\n",
- " self.temp_dir = tempfile.mkdtemp()\n",
- " self.db_path = os.path.join(self.temp_dir, 'temp_data.db')\n",
- " self.conn = sqlite3.connect(self.db_path)\n",
- " self.conn.execute('''CREATE TABLE IF NOT EXISTS temp_data\n",
- " (id INTEGER PRIMARY KEY AUTOINCREMENT,\n",
- " batch_id TEXT,\n",
- " data TEXT)''')\n",
- "\n",
- " async def save_data(self, batch_id: str, data: pd.DataFrame):\n",
- " if len(data) <= self.max_memory_rows:\n",
- " # Store small datasets directly in SQLite\n",
- " self.conn.execute(\"INSERT INTO temp_data (batch_id, data) VALUES (?, ?)\",\n",
- " (batch_id, data.to_json()))\n",
- " else:\n",
- " # Stream larger datasets to Parquet file\n",
- " file_path = os.path.join(self.temp_dir, f\"batch_{batch_id}.parquet\")\n",
- " table = pa.Table.from_pandas(data)\n",
- " pq.write_table(table, file_path)\n",
- " \n",
- " # Store file path in SQLite\n",
- " self.conn.execute(\"INSERT INTO temp_data (batch_id, data) VALUES (?, ?)\",\n",
- " (batch_id, file_path))\n",
- " self.conn.commit()\n",
- "\n",
- " async def get_data(self, batch_id: str) -> pd.DataFrame:\n",
- " cursor = self.conn.execute(\"SELECT data FROM temp_data WHERE batch_id = ?\", (batch_id,))\n",
- " result = cursor.fetchone()\n",
- " if result:\n",
- " data = result[0]\n",
- " if data.startswith('{'): # JSON data\n",
- " return pd.read_json(data)\n",
- " else: # File path\n",
- " return pd.read_parquet(data)\n",
- " return None\n",
- "\n",
- " def cleanup(self):\n",
- " self.conn.close()\n",
- " for file in os.listdir(self.temp_dir):\n",
- " os.remove(os.path.join(self.temp_dir, file))\n",
- " os.rmdir(self.temp_dir)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def get_historical_aggregated_values_batch_time_vars_data_async(\n",
- " self,\n",
- " start_time: datetime,\n",
- " end_time: datetime,\n",
- " pro_interval: int,\n",
- " agg_name: str,\n",
- " variable_list: List[str],\n",
- " max_data_points: int = 1000,\n",
- " max_retries: int = 3,\n",
- " retry_delay: int = 5,\n",
- " max_concurrent_requests: int = 10\n",
- ") -> pd.DataFrame:\n",
- " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
- " logger = logging.getLogger(__name__)\n",
- "\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
- " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
- " estimated_intervals = total_time_range_ms / pro_interval\n",
- " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
- " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
- " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
- " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
- "\n",
- " all_results = []\n",
- " semaphore = Semaphore(max_concurrent_requests)\n",
- " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n",
- " data_handler = DataHandler()\n",
- "\n",
- " async def process_batch(vid, variables, time_batch):\n",
- " async with semaphore:\n",
- " batch_start_ms = time_batch * time_batch_size_ms\n",
- " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
- " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
- " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
- "\n",
- " body = {\n",
- " **self.body,\n",
- " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"ReadValueIds\": variables,\n",
- " \"AggregateName\": agg_name\n",
- " }\n",
- "\n",
- " for attempt in range(max_retries):\n",
- " try:\n",
- " content = await request_from_api_async(\n",
- " client_pool,\n",
- " method=\"POST\",\n",
- " endpoint=f\"/values/historicalaggregated\",\n",
- " data=json.dumps(body, default=self.json_serial),\n",
- " extended_timeout=True\n",
- " )\n",
- " break\n",
- " except (aiohttp.ClientError, ValidationError) as e:\n",
- " if attempt < max_retries - 1:\n",
- " wait_time = retry_delay * (2 ** attempt)\n",
- " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
- " await asyncio.sleep(wait_time)\n",
- " else:\n",
- " logger.error(f\"Max retries reached. Error: {e}\")\n",
- " raise RuntimeError(f'Error message {e}')\n",
- "\n",
- " self._check_content(content)\n",
- "\n",
- " df_result = pd.json_normalize(\n",
- " content, \n",
- " record_path=['HistoryReadResults', 'DataValues'], \n",
- " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n",
- " ['HistoryReadResults', 'NodeId','Id'],\n",
- " ['HistoryReadResults', 'NodeId','Namespace']]\n",
- " )\n",
- " batch_id = f\"{time_batch}_{vid}\"\n",
- " await data_handler.save_data(batch_id, df_result)\n",
- " return batch_id\n",
- "\n",
- " tasks = [\n",
- " process_batch(vid,variables, time_batch)\n",
- " for vid,variables in enumerate(variable_batches)\n",
- " for time_batch in range(max_time_batches)\n",
- " ]\n",
- "\n",
- " try:\n",
- " batch_ids = await asyncio.gather(*tasks)\n",
- " # for batch_id in batch_ids:\n",
- " # df = await data_handler.get_data(batch_id)\n",
- " # all_results.append(df)\n",
- "\n",
- " # logger.info(\"Combining all batches...\")\n",
- " # combined_df = pd.concat(all_results, ignore_index=True)\n",
- " # columns = {\n",
- " # \"Value.Type\": \"ValueType\",\n",
- " # \"Value.Body\": \"Value\",\n",
- " # \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " # \"StatusCode.Code\": \"StatusCode\",\n",
- " # \"SourceTimestamp\": \"Timestamp\",\n",
- " # \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " # \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " # \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " # }\n",
- " # return self._process_df(combined_df, columns)\n",
- " finally:\n",
- " await client_pool.close_all()\n",
- " data_handler.cleanup()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1 day aggregated historical data\n",
- "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_data_async(\n",
- " opc_data,\n",
- " start_time=start_time,\n",
- " end_time=end_time,\n",
- " pro_interval=pro_interval,\n",
- " agg_name=agg_name,\n",
- " variable_list=variable_list,\n",
- " max_data_points=20000,\n",
- " max_concurrent_requests=50\n",
- ")\n",
- "one_day_historical_data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Async with parquet data handler for large data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [],
- "source": [
- "import asyncio\n",
- "import aiohttp\n",
- "import pandas as pd\n",
- "import pyarrow as pa\n",
- "import pyarrow.parquet as pq\n",
- "from datetime import datetime, timedelta\n",
- "import json\n",
- "from typing import List, Dict, Any\n",
- "import logging\n",
- "from asyncio import Semaphore\n",
- "from aiohttp import TCPConnector\n",
- "from tenacity import retry, stop_after_attempt, wait_exponential\n",
- "from concurrent.futures import ThreadPoolExecutor\n",
- "\n",
- "import tracemalloc\n",
- "tracemalloc.start()\n",
- "\n",
- "logger = logging.getLogger(__name__)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [],
- "source": [
- "class AsyncParquetWriter:\n",
- " def __init__(self, filename):\n",
- " self.filename = filename\n",
- " self.writer = None\n",
- " self.executor = ThreadPoolExecutor(max_workers=10)\n",
- "\n",
- " async def write(self, df):\n",
- " loop = asyncio.get_running_loop()\n",
- " table = pa.Table.from_pandas(df)\n",
- " if self.writer is None:\n",
- " self.writer = pq.ParquetWriter(self.filename, table.schema)\n",
- " await loop.run_in_executor(self.executor, self.writer.write_table, table)\n",
- "\n",
- " async def close(self):\n",
- " if self.writer:\n",
- " loop = asyncio.get_running_loop()\n",
- " await loop.run_in_executor(self.executor, self.writer.close)\n",
- " self.writer = None\n",
- "\n",
- "class DataHandler:\n",
- " def __init__(self, base_path):\n",
- " self.base_path = base_path\n",
- " self.writers = {}\n",
- "\n",
- " async def save_data(self, batch_id: str, data: pd.DataFrame):\n",
- " if batch_id not in self.writers:\n",
- " self.writers[batch_id] = AsyncParquetWriter(f\"{self.base_path}/batch_{batch_id}.parquet\")\n",
- " await self.writers[batch_id].write(data)\n",
- "\n",
- " async def close_all(self):\n",
- " for writer in self.writers.values():\n",
- " await writer.close()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n",
- " self,\n",
- " start_time: datetime,\n",
- " end_time: datetime,\n",
- " pro_interval: int,\n",
- " agg_name: str,\n",
- " variable_list: List[str],\n",
- " max_data_points: int = 100000,\n",
- " max_retries: int = 3,\n",
- " retry_delay: int = 5,\n",
- " max_concurrent_requests: int = 50\n",
- ") -> pd.DataFrame:\n",
- " logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
- " logger = logging.getLogger(__name__)\n",
- "\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in variable_list]\n",
- " total_time_range_ms = (end_time - start_time).total_seconds() * 1000\n",
- " estimated_intervals = total_time_range_ms / pro_interval\n",
- " max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))\n",
- " variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]\n",
- " max_time_batches = max(1, int(estimated_intervals / max_data_points))\n",
- " time_batch_size_ms = total_time_range_ms / max_time_batches\n",
- "\n",
- " all_results = []\n",
- " semaphore = Semaphore(max_concurrent_requests)\n",
- " client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)\n",
- " data_handler = DataHandler(base_path=\"pqfiles\")\n",
- "\n",
- " async def process_batch(vid, variables, time_batch):\n",
- " async with semaphore:\n",
- " batch_start_ms = time_batch * time_batch_size_ms\n",
- " batch_end_ms = min((time_batch + 1) * time_batch_size_ms, total_time_range_ms)\n",
- " batch_start = start_time + timedelta(milliseconds=batch_start_ms)\n",
- " batch_end = start_time + timedelta(milliseconds=batch_end_ms)\n",
- "\n",
- " body = {\n",
- " **self.body,\n",
- " \"StartTime\": batch_start.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"EndTime\": batch_end.strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"ReadValueIds\": variables,\n",
- " \"AggregateName\": agg_name\n",
- " }\n",
- "\n",
- " for attempt in range(max_retries):\n",
- " try:\n",
- " content = await request_from_api_async(\n",
- " client_pool,\n",
- " method=\"POST\",\n",
- " endpoint=f\"/values/historicalaggregated\",\n",
- " data=json.dumps(body, default=self.json_serial),\n",
- " extended_timeout=True\n",
- " )\n",
- " break\n",
- " except (aiohttp.ClientError, ValidationError) as e:\n",
- " if attempt < max_retries - 1:\n",
- " wait_time = retry_delay * (2 ** attempt)\n",
- " logger.warning(f\"Request failed. Retrying in {wait_time} seconds...\")\n",
- " await asyncio.sleep(wait_time)\n",
- " else:\n",
- " logger.error(f\"Max retries reached. Error: {e}\")\n",
- " raise RuntimeError(f'Error message {e}')\n",
- "\n",
- " self._check_content(content)\n",
- "\n",
- " df_result = pd.json_normalize(\n",
- " content, \n",
- " record_path=['HistoryReadResults', 'DataValues'], \n",
- " meta=[['HistoryReadResults', 'NodeId', 'IdType'], \n",
- " ['HistoryReadResults', 'NodeId','Id'],\n",
- " ['HistoryReadResults', 'NodeId','Namespace']]\n",
- " )\n",
- " batch_id = f\"{time_batch}_{vid}\"\n",
- " await data_handler.save_data(batch_id, df_result)\n",
- " return batch_id\n",
- "\n",
- " tasks = [\n",
- " process_batch(vid,variables, time_batch)\n",
- " for vid,variables in enumerate(variable_batches)\n",
- " for time_batch in range(max_time_batches)\n",
- " ]\n",
- "\n",
- " try:\n",
- " batch_ids = await asyncio.gather(*tasks)\n",
- " # for batch_id in batch_ids:\n",
- " # df = await data_handler.get_data(batch_id)\n",
- " # all_results.append(df)\n",
- "\n",
- " # logger.info(\"Combining all batches...\")\n",
- " # combined_df = pd.concat(all_results, ignore_index=True)\n",
- " # columns = {\n",
- " # \"Value.Type\": \"ValueType\",\n",
- " # \"Value.Body\": \"Value\",\n",
- " # \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " # \"StatusCode.Code\": \"StatusCode\",\n",
- " # \"SourceTimestamp\": \"Timestamp\",\n",
- " # \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " # \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " # \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " # }\n",
- " # return self._process_df(combined_df, columns)\n",
- " finally:\n",
- " await client_pool.close_all()\n",
- " await data_handler.close_all()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2024-07-18 12:29:48,821 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:48,825 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:48,830 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:48,836 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,941 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,950 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,952 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,955 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,958 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,965 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,968 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,970 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,973 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,975 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,976 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,982 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,986 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,989 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,991 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,994 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,997 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:54,999 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,003 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,008 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,010 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,015 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,018 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,022 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,025 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,027 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:29:55,030 - WARNING - Request failed. Retrying in 5 seconds...\n"
- ]
- },
- {
- "ename": "RuntimeError",
- "evalue": "BadSecureChannelClosed",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[1;32mIn[35], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# 1 day aggregated historical data\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m one_day_historical_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n\u001b[0;32m 3\u001b[0m opc_data,\n\u001b[0;32m 4\u001b[0m start_time\u001b[38;5;241m=\u001b[39mdatetime(\u001b[38;5;241m2024\u001b[39m,\u001b[38;5;241m6\u001b[39m,\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m00\u001b[39m,\u001b[38;5;241m00\u001b[39m),\n\u001b[0;32m 5\u001b[0m end_time\u001b[38;5;241m=\u001b[39mdatetime(\u001b[38;5;241m2024\u001b[39m,\u001b[38;5;241m6\u001b[39m,\u001b[38;5;241m2\u001b[39m,\u001b[38;5;241m00\u001b[39m,\u001b[38;5;241m00\u001b[39m),\n\u001b[0;32m 6\u001b[0m pro_interval\u001b[38;5;241m=\u001b[39mpro_interval,\n\u001b[0;32m 7\u001b[0m agg_name\u001b[38;5;241m=\u001b[39magg_name,\n\u001b[0;32m 8\u001b[0m variable_list\u001b[38;5;241m=\u001b[39mvariable_list,\n\u001b[0;32m 9\u001b[0m max_data_points\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50000\u001b[39m,\n\u001b[0;32m 10\u001b[0m max_concurrent_requests\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m\n\u001b[0;32m 11\u001b[0m )\n\u001b[0;32m 12\u001b[0m one_day_historical_data\n",
- "Cell \u001b[1;32mIn[33], line 84\u001b[0m, in \u001b[0;36mget_historical_aggregated_values_batch_time_vars_data_async_parquet\u001b[1;34m(self, start_time, end_time, pro_interval, agg_name, variable_list, max_data_points, max_retries, retry_delay, max_concurrent_requests)\u001b[0m\n\u001b[0;32m 77\u001b[0m tasks \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 78\u001b[0m process_batch(vid,variables, time_batch)\n\u001b[0;32m 79\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m vid,variables \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(variable_batches)\n\u001b[0;32m 80\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m time_batch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(max_time_batches)\n\u001b[0;32m 81\u001b[0m ]\n\u001b[0;32m 83\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 84\u001b[0m batch_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mtasks)\n\u001b[0;32m 85\u001b[0m \u001b[38;5;66;03m# for batch_id in batch_ids:\u001b[39;00m\n\u001b[0;32m 86\u001b[0m \u001b[38;5;66;03m# df = await data_handler.get_data(batch_id)\u001b[39;00m\n\u001b[0;32m 87\u001b[0m \u001b[38;5;66;03m# all_results.append(df)\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[38;5;66;03m# return self._process_df(combined_df, columns)\u001b[39;00m\n\u001b[0;32m 102\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 103\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m client_pool\u001b[38;5;241m.\u001b[39mclose_all()\n",
- "Cell \u001b[1;32mIn[33], line 64\u001b[0m, in \u001b[0;36mget_historical_aggregated_values_batch_time_vars_data_async_parquet..process_batch\u001b[1;34m(vid, variables, time_batch)\u001b[0m\n\u001b[0;32m 61\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMax retries reached. Error: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 62\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError message \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m---> 64\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_check_content\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 66\u001b[0m df_result \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mjson_normalize(\n\u001b[0;32m 67\u001b[0m content, \n\u001b[0;32m 68\u001b[0m record_path\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHistoryReadResults\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDataValues\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 71\u001b[0m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHistoryReadResults\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNodeId\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNamespace\u001b[39m\u001b[38;5;124m'\u001b[39m]]\n\u001b[0;32m 72\u001b[0m )\n\u001b[0;32m 73\u001b[0m batch_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtime_batch\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n",
- "File \u001b[1;32m~\\OneDrive - TGS Prediktor AS\\Dokumenter\\git_repos\\pyPrediktorMapClient\\src\\pyprediktormapclient\\opc_ua.py:319\u001b[0m, in \u001b[0;36mOPC_UA._check_content\u001b[1;34m(self, content)\u001b[0m\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo content returned from the server\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 318\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m content\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m--> 319\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(content\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mErrorMessage\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[0;32m 320\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHistoryReadResults\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m content:\n\u001b[0;32m 321\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(content\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mErrorMessage\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n",
- "\u001b[1;31mRuntimeError\u001b[0m: BadSecureChannelClosed"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2024-07-18 12:31:33,268 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,271 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,274 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,276 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,278 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,282 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,286 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,292 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,297 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:31:33,301 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:32:37,429 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:42,844 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:42,847 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:42,849 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:45,402 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,404 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,406 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,409 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,412 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,414 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,415 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,417 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,420 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,423 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,424 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,425 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,427 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,429 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,430 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,431 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,433 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,435 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,437 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,438 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,439 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,441 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,443 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,446 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,448 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,449 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,451 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,452 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,453 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,455 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,456 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,458 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,459 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,461 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,462 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,463 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,464 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,466 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,468 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,470 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,471 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,473 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,475 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,476 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,477 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,478 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,480 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,482 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,483 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,485 - ERROR - Unclosed client session\n",
- "client_session: \n",
- "2024-07-18 12:32:45,513 - ERROR - Unclosed connector\n",
- "connections: ['[(, 103057.281)]']\n",
- "connector: \n",
- "2024-07-18 12:32:50,890 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:50,892 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:50,894 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:50,896 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:50,897 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:50,901 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:50,904 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:50,909 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:32:50,911 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:02,993 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:02,995 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:02,996 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,000 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,002 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,005 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,006 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,010 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,012 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,018 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,020 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,022 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,025 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,026 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:03,029 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:09,145 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:09,147 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:09,149 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:42,948 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:33:50,613 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:09,237 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:34:15,552 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:15,556 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:15,562 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:15,566 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:15,569 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:15,589 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:15,603 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:15,611 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:34:21,430 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:34:21,434 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:34:21,482 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:34:21,510 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:21,545 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:21,585 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:34:21,630 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:21,965 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:34:21,989 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:34:22,008 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,026 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:34:22,039 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,046 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,052 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,058 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,060 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,068 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,075 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,083 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,089 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,090 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,097 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,105 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,106 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,108 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,117 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,119 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,123 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,124 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,126 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,128 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,130 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,136 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,138 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,140 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,142 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:22,146 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:43,133 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:34:50,798 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:09,424 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:15,741 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:35:15,858 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:21,702 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:21,778 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:21,817 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,367 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,414 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,441 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,446 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,447 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,470 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,476 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,483 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,495 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,508 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,511 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,522 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,574 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:22,578 - WARNING - Request failed. Retrying in 5 seconds...\n",
- "2024-07-18 12:35:25,808 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:25,813 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:25,815 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:25,825 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:25,830 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:25,832 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:25,836 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:25,845 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:35:26,682 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:35:26,699 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:35:26,783 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:35:26,930 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:35:27,164 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:35:27,202 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:35:27,246 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:35:56,099 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:25,968 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:36:26,988 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,122 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,150 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,661 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,666 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,681 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,719 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,746 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,796 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,830 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,838 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,842 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,866 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,874 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,881 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,884 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:27,958 - WARNING - Request failed. Retrying in 10 seconds...\n",
- "2024-07-18 12:36:36,878 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:36:36,944 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:36:36,977 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:36:37,175 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:36:37,322 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:36:37,373 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:36:37,483 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:06,473 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:37,230 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:37,350 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:37,383 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,132 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,168 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,171 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,173 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,175 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,181 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,184 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,202 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,205 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,207 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,210 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,212 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,216 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n",
- "2024-07-18 12:37:38,227 - ERROR - Max retries reached. Error: 504, message='Gateway Time-out', url=URL('https://apis-opcua-api.powerview.scatec.app/values/historicalaggregated')\n"
- ]
- }
- ],
- "source": [
- "# 1 day aggregated historical data\n",
- "one_day_historical_data = await get_historical_aggregated_values_batch_time_vars_data_async_parquet(\n",
- " opc_data,\n",
- " start_time=datetime(2024,6,1,00,00),\n",
- " end_time=datetime(2024,6,2,00,00),\n",
- " pro_interval=pro_interval,\n",
- " agg_name=agg_name,\n",
- " variable_list=variable_list,\n",
- " max_data_points=50000,\n",
- " max_concurrent_requests=50\n",
- ")\n",
- "one_day_historical_data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Stringset data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_historical_aggregated_values(opc_data,\n",
- " start_time, \n",
- " end_time, \n",
- " pro_interval, \n",
- " agg_name, \n",
- " variable_list\n",
- ") -> pd.DataFrame:\n",
- " \n",
- " vars = opc_data._get_variable_list_as_list(variable_list)\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars]\n",
- "\n",
- " body = {\n",
- " **opc_data.body, \n",
- " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n",
- " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n",
- " \"ProcessingInterval\": pro_interval, \n",
- " \"AggregateName\": agg_name,\n",
- " \"ReadValueIds\": extended_variables\n",
- " }\n",
- " print(body)\n",
- "\n",
- " content = request_from_api(\n",
- " rest_url=opcua_rest_url, \n",
- " method=\"POST\", \n",
- " endpoint=\"values/historicalaggregated\", \n",
- " data=json.dumps(body, default=opc_data.json_serial), \n",
- " headers=opc_data.headers, \n",
- " extended_timeout=True\n",
- " )\n",
- " print(content)\n",
- " df_result = pd.json_normalize(\n",
- " content, \n",
- " record_path=['HistoryReadResults', 'DataValues'], \n",
- " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],['HistoryReadResults', 'NodeId','Namespace']\n",
- " ]\n",
- " )\n",
- " columns = {\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " }\n",
- " return opc_data._process_df(df_result, columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "start_time=(datetime.datetime.now() - datetime.timedelta(30))\n",
- "end_time=(datetime.datetime.now() - datetime.timedelta(29))\n",
- "pro_interval=600000\n",
- "agg_name=\"Average\"\n",
- "variable_list=string_sets.variables_as_list([\"DCPower\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_historical_aggregated_values(opc_data,\n",
- " start_time, \n",
- " end_time, \n",
- " pro_interval, \n",
- " agg_name, \n",
- " variable_list) -> pd.DataFrame:\n",
- " vars = opc_data._get_variable_list_as_list(variable_list)\n",
- " batch_size = 100\n",
- " batches = [vars[i:i + batch_size] for i in range(0, len(vars), batch_size)]\n",
- " \n",
- " combined_df = pd.DataFrame() \n",
- " \n",
- " for batch in batches:\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in batch]\n",
- " \n",
- " body = {\n",
- " **opc_data.body, \n",
- " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n",
- " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"), \n",
- " \"ProcessingInterval\": pro_interval, \n",
- " \"AggregateName\": agg_name,\n",
- " \"ReadValueIds\": extended_variables\n",
- " }\n",
- " \n",
- " content = request_from_api(\n",
- " rest_url=opcua_rest_url, \n",
- " method=\"POST\", \n",
- " endpoint=\"values/historicalaggregated\", \n",
- " data=json.dumps(body, default=opc_data.json_serial), \n",
- " headers=opc_data.headers, \n",
- " extended_timeout=True\n",
- " )\n",
- " \n",
- " df_result = pd.json_normalize(\n",
- " content, \n",
- " record_path=['HistoryReadResults', 'DataValues'], \n",
- " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],['HistoryReadResults', 'NodeId','Namespace']]\n",
- " )\n",
- " \n",
- " if combined_df.empty:\n",
- " combined_df = df_result\n",
- " else:\n",
- " combined_df = pd.concat([combined_df, df_result], ignore_index=True)\n",
- " \n",
- " columns = {\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " }\n",
- " \n",
- " return opc_data._process_df(combined_df, columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "get_historical_aggregated_values(opc_data,\n",
- " start_time, \n",
- " end_time, \n",
- " pro_interval, \n",
- " agg_name, \n",
- " variable_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import hashlib\n",
- "import concurrent.futures"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_historical_aggregated_values(opc_data, start_time, end_time, pro_interval, agg_name, variable_list) -> pd.DataFrame:\n",
- " vars = opc_data._get_variable_list_as_list(variable_list)\n",
- " batch_size = 150\n",
- " batches = [vars[i:i + batch_size] for i in range(0, len(vars), batch_size)]\n",
- "\n",
- " def process_batch(batch):\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in batch]\n",
- " body = {\n",
- " **opc_data.body,\n",
- " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n",
- " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"AggregateName\": agg_name,\n",
- " \"ReadValueIds\": extended_variables\n",
- " }\n",
- " content = request_from_api(\n",
- " rest_url=opcua_rest_url,\n",
- " method=\"POST\",\n",
- " endpoint=\"values/historicalaggregated\",\n",
- " data=json.dumps(body, default=opc_data.json_serial),\n",
- " headers=opc_data.headers,\n",
- " extended_timeout=True\n",
- " )\n",
- " return pd.json_normalize(\n",
- " content,\n",
- " record_path=['HistoryReadResults', 'DataValues'],\n",
- " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId', 'Id'], ['HistoryReadResults', 'NodeId', 'Namespace']]\n",
- " )\n",
- "\n",
- " dataframes = []\n",
- " with concurrent.futures.ThreadPoolExecutor() as executor:\n",
- " future_to_batch = {executor.submit(process_batch, batch): batch for batch in batches}\n",
- " for future in concurrent.futures.as_completed(future_to_batch):\n",
- " dataframes.append(future.result())\n",
- "\n",
- " combined_df = pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()\n",
- "\n",
- " columns = {\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"IdType\",\n",
- " \"HistoryReadResults.NodeId.Id\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " }\n",
- "\n",
- " return opc_data._process_df(combined_df, columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "vars = opc_data._get_variable_list_as_list(variable_list)\n",
- "extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "body = {\n",
- " **opc_data.body,\n",
- " \"StartTime\": start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n",
- " \"EndTime\": end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\"),\n",
- " \"ProcessingInterval\": pro_interval,\n",
- " \"AggregateName\": agg_name,\n",
- " \"ReadValueIds\": extended_variables\n",
- "}\n",
- "body"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "get_historical_aggregated_values(opc_data,\n",
- " start_time, \n",
- " end_time, \n",
- " pro_interval, \n",
- " agg_name, \n",
- " variable_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "start_time = datetime.now() - relativedelta(months=1)\n",
- "end_time = datetime.now()\n",
- "get_historical_aggregated_values(opc_data,\n",
- " start_time, \n",
- " end_time, \n",
- " pro_interval, \n",
- " agg_name, \n",
- " variable_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# History data for 1 day, 10 min aggregate - stringsets\n",
- "history_agg = opc_data.get_historical_aggregated_values(\n",
- " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
- " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
- " pro_interval=600000,\n",
- " agg_name=\"Average\",\n",
- " variable_list=inverters.variables_as_list([\"DCPower\"]),\n",
- ")\n",
- "history_agg"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import copy\n",
- "import math\n",
- "from pydantic import BaseModel, AnyUrl\n",
- "from datetime import timedelta\n",
- "import asyncio\n",
- "import aiohttp"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "class Variables(BaseModel):\n",
- " \"\"\"Helper class to parse all values api's.\n",
- " Variables are described in https://reference.opcfoundation.org/v104/Core/docs/Part3/8.2.1/\n",
- "\n",
- " Variables:\n",
- " Id: str - Id of the signal, e.g. SSO.EG-AS.WeatherSymbol\n",
- " Namespace: int - Namespace on the signal, e.g. 2.\n",
- " IdType: int - IdTypes described in https://reference.opcfoundation.org/v104/Core/docs/Part3/8.2.3/.\n",
- " \"\"\"\n",
- " Id: str\n",
- " Namespace: int\n",
- " IdType: int"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables]) -> dict:\n",
- " \"\"\"Make API request for the given time range and variable list\"\"\"\n",
- "\n",
- " # Creating a new variable list to remove pydantic models\n",
- " vars = opc_data._get_variable_list_as_list(variable_list)\n",
- "\n",
- " extended_variables = [\n",
- " {\n",
- " \"NodeId\": var,\n",
- " \"AggregateName\": agg_name,\n",
- " }\n",
- " for var in vars\n",
- " ]\n",
- " body = copy.deepcopy(opc_data.body)\n",
- " body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
- " body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
- " body[\"ProcessingInterval\"] = pro_interval\n",
- " body[\"ReadValueIds\"] = extended_variables\n",
- " body[\"AggregateName\"] = agg_name\n",
- "\n",
- " # Make API request using aiohttp session\n",
- " async with aiohttp.ClientSession() as session:\n",
- " async with session.post(\n",
- " f\"{opcua_rest_url}values/historicalaggregated\",\n",
- " data=json.dumps(body, default=opc_data.json_serial),\n",
- " headers=opc_data.headers,\n",
- " timeout=aiohttp.ClientTimeout(total=None) \n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " content = await response.json()\n",
- "\n",
- " return content"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "vars = opc_data._get_variable_list_as_list(variable_list)\n",
- "vars1 = vars[0:5]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "extended_variables = [\n",
- " {\n",
- " \"NodeId\": var,\n",
- " \"AggregateName\": agg_name,\n",
- " }\n",
- " for var in vars1\n",
- "]\n",
- "len(extended_variables)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "body = copy.deepcopy(opc_data.body)\n",
- "body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
- "body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
- "body[\"ProcessingInterval\"] = pro_interval\n",
- "body[\"ReadValueIds\"] = extended_variables\n",
- "body[\"AggregateName\"] = agg_name\n",
- "body"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "f\"{opcua_rest_url}values/historicalaggregated\","
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "data=json.dumps(body, default=opc_data.json_serial)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "data_dict = json.loads(data)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "read_value_ids = data_dict['ReadValueIds']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "len(read_value_ids)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "headers=opc_data.headers\n",
- "headers"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "timeout=aiohttp.ClientTimeout(total=None) \n",
- "timeout"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "async with aiohttp.ClientSession() as session:\n",
- " async with session.post(\n",
- " f\"{opcua_rest_url}values/historicalaggregated\",\n",
- " data=json.dumps(body, default=opc_data.json_serial),\n",
- " headers=opc_data.headers,\n",
- " timeout=aiohttp.ClientTimeout(total=None) \n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " content = await response.json()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "content"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def generate_time_batches(start_time: datetime, end_time: datetime, pro_interval: int, batch_size: int) -> list[tuple]:\n",
- " \"\"\"Generate time batches based on start time, end time, processing interval, and batch size\"\"\"\n",
- "\n",
- " total_time_range = end_time - start_time\n",
- " pro_interval_seconds = (pro_interval / 1000)\n",
- " total_data_points = (total_time_range.total_seconds() // pro_interval_seconds) + 1\n",
- "\n",
- " total_batches = math.ceil(total_data_points / batch_size)\n",
- " actual_batch_size = math.ceil(total_data_points / total_batches)\n",
- "\n",
- " time_batches = [\n",
- " (start_time + timedelta(seconds=(i * actual_batch_size * pro_interval_seconds)),\n",
- " start_time + timedelta(seconds=((i + 1) * actual_batch_size * pro_interval_seconds)) - timedelta(seconds=pro_interval_seconds))\n",
- " for i in range(total_batches)\n",
- " ]\n",
- "\n",
- " return time_batches"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def generate_variable_batches(variable_list: list[Variables], batch_size: int) -> list[list[Variables]]:\n",
- " \"\"\"Generate variable batches based on the variable list and batch size\"\"\"\n",
- "\n",
- " variable_batches = [\n",
- " variable_list[i:i + batch_size] for i in range(0, len(variable_list), batch_size)\n",
- " ]\n",
- "\n",
- " return variable_batches"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def process_api_response(opc_data, response: dict) -> pd.DataFrame:\n",
- " \"\"\"Process the API response and return the result dataframe\"\"\"\n",
- " \n",
- " df_result = pd.json_normalize(response, record_path=['HistoryReadResults', 'DataValues'], \n",
- " meta=[['HistoryReadResults', 'NodeId', 'IdType'], ['HistoryReadResults', 'NodeId','Id'],\n",
- " ['HistoryReadResults', 'NodeId','Namespace']] )\n",
- "\n",
- " for i, row in df_result.iterrows():\n",
- " if not math.isnan(row[\"Value.Type\"]):\n",
- " value_type = opc_data._get_value_type(int(row[\"Value.Type\"])).get(\"type\")\n",
- " df_result.at[i, \"Value.Type\"] = str(value_type)\n",
- "\n",
- " df_result.rename(\n",
- " columns={\n",
- " \"Value.Type\": \"ValueType\",\n",
- " \"Value.Body\": \"Value\",\n",
- " \"StatusCode.Symbol\": \"StatusSymbol\",\n",
- " \"StatusCode.Code\": \"StatusCode\",\n",
- " \"SourceTimestamp\": \"Timestamp\",\n",
- " \"HistoryReadResults.NodeId.IdType\": \"Id\",\n",
- " \"HistoryReadResults.NodeId.Namespace\": \"Namespace\",\n",
- " },\n",
- " errors=\"raise\",\n",
- " inplace=True,\n",
- " )\n",
- "\n",
- " return df_result"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def get_historical_aggregated_values_async(\n",
- " opc_data,\n",
- " start_time: datetime,\n",
- " end_time: datetime,\n",
- " pro_interval: int,\n",
- " agg_name: str,\n",
- " variable_list: list[Variables],\n",
- " batch_size: int = 1000\n",
- ") -> pd.DataFrame:\n",
- " \"\"\"Request historical aggregated values from the OPC UA server with batching\"\"\"\n",
- "\n",
- " \n",
- " time_batches = generate_time_batches(start_time, end_time, pro_interval, batch_size)\n",
- " variable_batches = generate_variable_batches(variable_list, batch_size)\n",
- "\n",
- " # Creating tasks for each API request and gathering the results\n",
- " tasks = []\n",
- "\n",
- " for time_batch_start, time_batch_end in time_batches:\n",
- " for variable_sublist in variable_batches:\n",
- " task = asyncio.create_task(\n",
- " make_async_api_request(opc_data, time_batch_start, time_batch_end, pro_interval, agg_name, variable_sublist)\n",
- " ) \n",
- " tasks.append(task)\n",
- " \n",
- " # Execute all tasks concurrently and gather their results\n",
- " responses = await asyncio.gather(*tasks)\n",
- " \n",
- " # Processing the API responses\n",
- " result_list = []\n",
- " for idx, batch_response in enumerate(responses):\n",
- " \n",
- " batch_result = process_api_response(opc_data, batch_response)\n",
- " result_list.append(batch_result)\n",
- " \n",
- " result_df = pd.concat(result_list, ignore_index=True)\n",
- "\n",
- " return result_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1 day aggregated historical inverter data in asyncio process\n",
- "one_days_historic_inverter_data2 = await get_historical_aggregated_values_async(\n",
- " opc_data,\n",
- " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
- " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
- " pro_interval=60*1000,\n",
- " agg_name=\"Average\",\n",
- " variable_list=string_sets.variables_as_list([\"DCPower\"]),\n",
- " batch_size=100\n",
- ")\n",
- "one_days_historic_inverter_data2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def generate_time_chunks(start_time: datetime, end_time: datetime):\n",
- " \"\"\"Generate time chunks between start_time and end_time, each chunk_duration_minutes long.\"\"\"\n",
- " delta = timedelta(minutes=60)\n",
- " current_time = start_time\n",
- " while current_time < end_time:\n",
- " chunk_end_time = min(current_time + delta, end_time)\n",
- " yield (current_time, chunk_end_time)\n",
- " current_time = chunk_end_time"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables], max_data_points=500) -> dict:\n",
- " \"\"\"Make API request for the given time range and variable list, with additional chunking based on data points.\"\"\"\n",
- "\n",
- " def chunk_list(lst, n):\n",
- " \"\"\"Yield successive n-sized chunks from lst.\"\"\"\n",
- " for i in range(0, len(lst), n):\n",
- " yield lst[i:i + n]\n",
- "\n",
- " async def fetch_data_for_time_period(session, vars_chunk, start, end):\n",
- " \"\"\"Fetch data for a given time period and chunk of variables.\"\"\"\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars_chunk]\n",
- " body = copy.deepcopy(opc_data.body)\n",
- " body[\"StartTime\"] = start.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
- " body[\"EndTime\"] = end.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
- " body[\"ProcessingInterval\"] = pro_interval\n",
- " body[\"ReadValueIds\"] = extended_variables\n",
- " body[\"AggregateName\"] = agg_name\n",
- "\n",
- " async with session.post(\n",
- " f\"{opcua_rest_url}values/historicalaggregated\",\n",
- " data=json.dumps(body, default=str),\n",
- " headers=opc_data.headers,\n",
- " timeout=aiohttp.ClientTimeout(total=None)\n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " return await response.json()\n",
- "\n",
- " # Creating a new variable list to remove pydantic models\n",
- " vars = opc_data._get_variable_list_as_list(variable_list)\n",
- " chunk_size = 5 # Chunk size for node IDs\n",
- " vars_chunks = list(chunk_list(vars, chunk_size))\n",
- "\n",
- " all_responses = []\n",
- " async with aiohttp.ClientSession() as session:\n",
- " for vars_chunk in vars_chunks:\n",
- " # Generate time chunks for the given time period\n",
- " async for start, end in generate_time_chunks(start_time, end_time):\n",
- " content = await fetch_data_for_time_period(session, vars_chunk, start, end)\n",
- " all_responses.append(content)\n",
- " return all_responses"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "async def make_async_api_request(opc_data, start_time: datetime, end_time: datetime, pro_interval: int, agg_name: str, variable_list: list[Variables]) -> dict:\n",
- " \"\"\"Make API request for the given time range and variable list\"\"\"\n",
- "\n",
- " def chunk_list(lst, n):\n",
- " for i in range(0, len(lst), n):\n",
- " yield lst[i:i + n]\n",
- "\n",
- " # Creating a new variable list to remove pydantic models\n",
- " vars = opc_data._get_variable_list_as_list(variable_list)\n",
- "\n",
- " chunk_size = 150 \n",
- " vars_chunks = list(chunk_list(vars, chunk_size))\n",
- "\n",
- " all_responses = []\n",
- " async with aiohttp.ClientSession() as session:\n",
- " for vars_chunk in vars_chunks:\n",
- " extended_variables = [{\"NodeId\": var, \"AggregateName\": agg_name} for var in vars_chunk]\n",
- " body = copy.deepcopy(opc_data.body)\n",
- " body[\"StartTime\"] = start_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
- " body[\"EndTime\"] = end_time.strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
- " body[\"ProcessingInterval\"] = pro_interval\n",
- " body[\"ReadValueIds\"] = extended_variables\n",
- " body[\"AggregateName\"] = agg_name\n",
- "\n",
- " async with session.post(\n",
- " f\"{opcua_rest_url}values/historicalaggregated\",\n",
- " data=json.dumps(body, default=str),\n",
- " headers=opc_data.headers,\n",
- " timeout=aiohttp.ClientTimeout(total=None)\n",
- " ) as response:\n",
- " response.raise_for_status()\n",
- " content = await response.json()\n",
- " all_responses.append(content) \n",
- "\n",
- " return all_responses"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from datetime import datetime, timedelta\n",
- "from typing import List, Tuple"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def generate_time_chunks(start_time: datetime, end_time: datetime, interval_hours: int) -> List[Tuple[datetime, datetime]]:\n",
- " \"\"\"Generate time chunks within the given start and end time with specified interval in hours.\"\"\"\n",
- " delta = timedelta(hours=interval_hours)\n",
- " current_time = start_time\n",
- " chunks = []\n",
- "\n",
- " while current_time < end_time:\n",
- " chunk_end_time = min(current_time + delta, end_time) \n",
- " chunks.append((current_time, chunk_end_time))\n",
- " current_time += delta\n",
- "\n",
- " return chunks"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 1 day aggregated historical inverter data in asyncio process\n",
- "one_days_historic_inverter_data2 = await make_async_api_request(\n",
- " opc_data,\n",
- " start_time=(datetime.datetime.now() - datetime.timedelta(30)),\n",
- " end_time=(datetime.datetime.now() - datetime.timedelta(29)),\n",
- " pro_interval=60*1000,\n",
- " agg_name=\"Average\",\n",
- " variable_list=string_sets.variables_as_list([\"DCPower\"])\n",
- ")\n",
- "one_days_historic_inverter_data2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3.12.1 64-bit",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.4"
- },
- "orig_nbformat": 4,
- "vscode": {
- "interpreter": {
- "hash": "6b866f0bc560289bf4bb2415ae9074243764eb008c10d00a1da29433677418de"
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/pyprediktormapclient/opc_ua.py b/src/pyprediktormapclient/opc_ua.py
index fb1cbb9..3c0e8b0 100644
--- a/src/pyprediktormapclient/opc_ua.py
+++ b/src/pyprediktormapclient/opc_ua.py
@@ -1,20 +1,18 @@
import json
-import math
import logging
-import datetime
import copy
import pandas as pd
+import requests
from datetime import date, datetime, timedelta
from typing import Dict, List, Any, Union, Optional
-from pydantic import BaseModel, AnyUrl, validate_call, ValidationError
+from pydantic import BaseModel, AnyUrl
from pydantic_core import Url
-from pyprediktormapclient.shared import request_from_api, request_from_api_async, ClientPool
+from pyprediktormapclient.shared import request_from_api
from requests import HTTPError
-from aiohttp import ClientSession
-from asyncio import Semaphore
import asyncio
-import requests
import aiohttp
+from aiohttp import ClientSession
+from asyncio import Semaphore
logger = logging.getLogger(__name__)
@@ -146,7 +144,6 @@ class OPC_UA:
class Config:
arbitrary_types_allowed = True
-
def __init__(self, rest_url: AnyUrl, opcua_url: AnyUrl, namespaces: List = None, auth_client: object = None, session: requests.Session = None):
"""Class initializer
@@ -469,70 +466,31 @@ def get_historical_aggregated_values(self,
}
return self._process_df(df_result, columns)
- async def _fetch_data_async(self, endpoint: str, body: Dict, max_retries:int, retry_delay:int) -> pd.DataFrame:
- """
- Fetch data from the API and return it as a DataFrame.
- """
- for attempt in range(max_retries):
- try:
- async with ClientSession() as session:
- async with session.post(
- url=self.rest_url + endpoint,
- json=body,
- headers=self.headers
- ) as response:
- response.raise_for_status()
- content = await response.json()
- except aiohttp.ClientError as e:
- if attempt < max_retries - 1:
- wait_time = retry_delay * (2 ** attempt)
- logger.warning(f"Request failed. Retrying in {wait_time} seconds...")
- await asyncio.sleep(wait_time)
- else:
- logger.error(f"Max retries reached. Error: {e}")
- raise RuntimeError(f'Error message {e}')
- self._check_content(content)
-
- df_list = []
- for item in content["HistoryReadResults"]:
- df = pd.json_normalize(item["DataValues"])
- for key, value in item["NodeId"].items():
- df[f"HistoryReadResults.NodeId.{key}"] = value
- df_list.append(df)
-
- if df_list:
- df_result = pd.concat(df_list)
- df_result.reset_index(inplace=True, drop=True)
- return df_result
-
- return df_result
-
- async def get_historical_aggregated_values_async(
- self,
- start_time: datetime,
- end_time: datetime,
- pro_interval: int,
- agg_name: str,
- variable_list: List[Variables],
- max_data_points: int = 10000,
- max_retries: int = 3,
- retry_delay: int = 5,
- max_concurrent_requests: int = 10
+ async def get_raw_historical_values_async(
+ self,
+ start_time: datetime,
+ end_time: datetime,
+ variable_list: list,
+ limit_start_index: Union[int, None] = None,
+ limit_num_records: Union[int, None] = None,
+ max_data_points: int = 10000,
+ max_retries: int = 3,
+ retry_delay: int = 5,
+ max_concurrent_requests: int = 35
) -> pd.DataFrame:
+
"""Request historical aggregated values from the OPC UA server with batching"""
total_time_range_ms = (end_time - start_time).total_seconds() * 1000
- estimated_intervals = total_time_range_ms / pro_interval
-
+ estimated_intervals = total_time_range_ms / max_data_points
+
max_variables_per_batch = max(1, int(max_data_points / estimated_intervals))
max_time_batches = max(1, int(estimated_intervals / max_data_points))
time_batch_size_ms = total_time_range_ms / max_time_batches
- extended_variables = [{"NodeId": var, "AggregateName": agg_name} for var in variable_list]
- variable_batches = [
- extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)
- ]
+ extended_variables = [{"NodeId": var} for var in variable_list]
+ variable_batches = [extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)]
all_results = []
semaphore = Semaphore(max_concurrent_requests)
@@ -544,18 +502,49 @@ async def process_batch(variables, time_batch):
batch_start = start_time + timedelta(milliseconds=batch_start_ms)
batch_end = start_time + timedelta(milliseconds=batch_end_ms)
- additional_params = {
- "ProcessingInterval": pro_interval,
- "AggregateName": agg_name
+ body = {
+ **self.body,
+ "StartTime": batch_start.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
+ "EndTime": batch_end.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
+ "ReadValueIds": variables,
}
- body = self._prepare_body(
- batch_start,
- batch_end,
- variables,
- additional_params
- )
- df_result = await self._fetch_data_async("values/historicalaggregated", body, max_retries, retry_delay)
- return df_result
+
+ if limit_start_index is not None and limit_num_records is not None:
+ body["Limit"] = {"StartIndex": limit_start_index, "NumRecords": limit_num_records}
+
+ for attempt in range(max_retries):
+ try:
+ async with ClientSession() as session:
+ async with session.post(
+ f"{self.rest_url}values/historical",
+ json=body,
+ headers=self.headers
+ ) as response:
+ response.raise_for_status()
+ content = await response.json()
+ break
+ except aiohttp.ClientError as e:
+ if attempt < max_retries - 1:
+ wait_time = retry_delay * (2 ** attempt)
+ logger.warning(f"Request failed. Retrying in {wait_time} seconds...")
+ await asyncio.sleep(wait_time)
+ else:
+ logger.error(f"Max retries reached. Error: {e}")
+ raise RuntimeError(f'Error message {e}')
+
+ self._check_content(content)
+
+ df_list = []
+ for item in content["HistoryReadResults"]:
+ df = pd.json_normalize(item["DataValues"])
+ for key, value in item["NodeId"].items():
+ df[f"HistoryReadResults.NodeId.{key}"] = value
+ df_list.append(df)
+
+ if df_list:
+ df_result = pd.concat(df_list)
+ df_result.reset_index(inplace=True, drop=True)
+ return df_result
tasks = [
process_batch(variables, time_batch)
@@ -567,35 +556,33 @@ async def process_batch(variables, time_batch):
all_results.extend(results)
logger.info("Combining all batches...")
- combined_df = pd.concat(results, ignore_index=True)
-
+ combined_df = pd.concat(all_results, ignore_index=True)
columns = {
- "Value.Type": "ValueType",
- "Value.Body": "Value",
- "StatusCode.Symbol": "StatusSymbol",
- "StatusCode.Code": "StatusCode",
- "SourceTimestamp": "Timestamp",
- "HistoryReadResults.NodeId.IdType": "IdType",
- "HistoryReadResults.NodeId.Id": "Id",
- "HistoryReadResults.NodeId.Namespace": "Namespace",
+ "Value.Type": "ValueType",
+ "Value.Body": "Value",
+ "StatusCode.Symbol": "StatusSymbol",
+ "StatusCode.Code": "StatusCode",
+ "SourceTimestamp": "Timestamp",
+ "HistoryReadResults.NodeId.IdType": "IdType",
+ "HistoryReadResults.NodeId.Id": "Id",
+ "HistoryReadResults.NodeId.Namespace": "Namespace",
}
return self._process_df(combined_df, columns)
-
- async def get_historical_aggregated_values_batch_time_vars_async(
+
+ async def get_historical_aggregated_values_async(
self,
start_time: datetime,
end_time: datetime,
pro_interval: int,
agg_name: str,
- variable_list: List[str],
- max_data_points: int = 100000,
+ variable_list: list,
+ max_data_points: int = 10000,
max_retries: int = 3,
retry_delay: int = 5,
- max_concurrent_requests: int = 50
+ max_concurrent_requests: int = 35
) -> pd.DataFrame:
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
- logger = logging.getLogger(__name__)
+ """Request historical aggregated values from the OPC UA server with batching"""
total_time_range_ms = (end_time - start_time).total_seconds() * 1000
estimated_intervals = total_time_range_ms / pro_interval
@@ -604,14 +591,15 @@ async def get_historical_aggregated_values_batch_time_vars_async(
max_time_batches = max(1, int(estimated_intervals / max_data_points))
time_batch_size_ms = total_time_range_ms / max_time_batches
- extended_variables = [{"NodeId": var, "AggregateName": agg_name} for var in variable_list]
+ extended_variables = [
+ {"NodeId": var, "AggregateName": agg_name} for var in variable_list
+ ]
variable_batches = [
extended_variables[i:i + max_variables_per_batch] for i in range(0, len(extended_variables), max_variables_per_batch)
]
all_results = []
semaphore = Semaphore(max_concurrent_requests)
- client_pool = ClientPool(max_concurrent_requests, self.rest_url, self.headers)
async def process_batch(variables, time_batch):
async with semaphore:
@@ -628,18 +616,18 @@ async def process_batch(variables, time_batch):
"ReadValueIds": variables,
"AggregateName": agg_name
}
-
for attempt in range(max_retries):
try:
- content = await request_from_api_async(
- client_pool,
- method="POST",
- endpoint=f"/values/historicalaggregated",
- data=json.dumps(body, default=self.json_serial),
- extended_timeout=True
- )
- break
- except (aiohttp.ClientError, ValidationError) as e:
+ async with ClientSession() as session:
+ async with session.post(
+ f"{self.rest_url}values/historicalaggregated",
+ json=body,
+ headers=self.headers
+ ) as response:
+ response.raise_for_status()
+ content = await response.json()
+ break
+ except aiohttp.ClientError as e:
if attempt < max_retries - 1:
wait_time = retry_delay * (2 ** attempt)
logger.warning(f"Request failed. Retrying in {wait_time} seconds...")
@@ -668,25 +656,24 @@ async def process_batch(variables, time_batch):
for time_batch in range(max_time_batches)
]
- try:
- results = await asyncio.gather(*tasks)
- all_results.extend(results)
-
- logger.info("Combining all batches...")
- combined_df = pd.concat(all_results, ignore_index=True)
- columns = {
- "Value.Type": "ValueType",
- "Value.Body": "Value",
- "StatusCode.Symbol": "StatusSymbol",
- "StatusCode.Code": "StatusCode",
- "SourceTimestamp": "Timestamp",
- "HistoryReadResults.NodeId.IdType": "IdType",
- "HistoryReadResults.NodeId.Id": "Id",
- "HistoryReadResults.NodeId.Namespace": "Namespace",
- }
- return self._process_df(combined_df, columns)
- finally:
- await client_pool.close_all()
+ results = await asyncio.gather(*tasks)
+ all_results.extend(results)
+
+ logger.info("Combining all batches...")
+ combined_df = pd.concat(results, ignore_index=True)
+
+ columns = {
+ "Value.Type": "ValueType",
+ "Value.Body": "Value",
+ "StatusCode.Symbol": "StatusSymbol",
+ "StatusCode.Code": "StatusCode",
+ "SourceTimestamp": "Timestamp",
+ "HistoryReadResults.NodeId.IdType": "IdType",
+ "HistoryReadResults.NodeId.Id": "Id",
+ "HistoryReadResults.NodeId.Namespace": "Namespace",
+ }
+ return self._process_df(combined_df, columns)
+
def write_values(self, variable_list: List[WriteVariables]) -> List:
"""Request to write realtime values to the OPC UA server
diff --git a/src/pyprediktormapclient/shared.py b/src/pyprediktormapclient/shared.py
index b111ce1..09511f9 100644
--- a/src/pyprediktormapclient/shared.py
+++ b/src/pyprediktormapclient/shared.py
@@ -7,25 +7,6 @@
class Config:
arbitrary_types_allowed = True
-class ClientPool:
- def __init__(self, num_clients: int, rest_url: str, headers: Dict[str, str]):
- self.clients = asyncio.Queue()
- for _ in range(num_clients):
- self.clients.put_nowait(aiohttp.ClientSession(base_url=rest_url, headers=headers))
- self.num_clients = num_clients
-
- async def get_client(self):
- client = await self.clients.get()
- return client
-
- async def release_client(self, client):
- await self.clients.put(client)
-
- async def close_all(self):
- while not self.clients.empty():
- client = await self.clients.get()
- await client.close()
-
def request_from_api(
rest_url: AnyUrl,
method: Literal["GET", "POST"],
@@ -71,34 +52,3 @@ def request_from_api(
else:
return {"error": "Non-JSON response", "content": result.text}
-
-async def request_from_api_async(
- client_pool: ClientPool,
- method: str,
- endpoint: str,
- data: str = None,
- params: Dict[str, Any] = None,
- extended_timeout: bool = False,
-) -> Dict[str, Any]:
- timeout = aiohttp.ClientTimeout(total=300 if extended_timeout else 30)
- client = await client_pool.get_client()
-
- try:
- if method == "GET":
- async with client.get(endpoint, params=params, timeout=timeout) as response:
- response.raise_for_status()
- if 'application/json' in response.headers.get('Content-Type', ''):
- return await response.json()
- else:
- return {"error": "Non-JSON response", "content": await response.text()}
- elif method == "POST":
- async with client.post(endpoint, data=data, params=params, timeout=timeout) as response:
- response.raise_for_status()
- if 'application/json' in response.headers.get('Content-Type', ''):
- return await response.json()
- else:
- return {"error": "Non-JSON response", "content": await response.text()}
- else:
- raise ValidationError("Unsupported method")
- finally:
- await client_pool.release_client(client)