From 0fc7b3f9d0beaf583ebbcf86a2cc5953e26fbb17 Mon Sep 17 00:00:00 2001 From: oscgonfer Date: Mon, 23 Oct 2023 12:45:27 +0200 Subject: [PATCH] Move to asyncio for speed --- .github/workflows/python-app.yml | 60 ++ setup.cfg | 2 +- setup.py | 13 +- smartcitizen_connector/__init__.py | 17 + smartcitizen_connector/config/__init__.py | 1 + smartcitizen_connector/config/config.py | 8 + smartcitizen_connector/device/__init__.py | 1 + smartcitizen_connector/device/device.py | 354 ++++++++ smartcitizen_connector/models/__init__.py | 2 + smartcitizen_connector/models/models.py | 87 ++ smartcitizen_connector/utils/__init__.py | 1 + smartcitizen_connector/utils/utils.py | 139 +++ src/smartcitizen_connector/__init__.py | 12 - src/smartcitizen_connector/models.py | 67 -- .../smartcitizen_connector.py | 817 ------------------ tests/connector/test_connector.py | 32 + 16 files changed, 709 insertions(+), 904 deletions(-) create mode 100644 .github/workflows/python-app.yml create mode 100644 smartcitizen_connector/__init__.py create mode 100644 smartcitizen_connector/config/__init__.py create mode 100644 smartcitizen_connector/config/config.py create mode 100644 smartcitizen_connector/device/__init__.py create mode 100644 smartcitizen_connector/device/device.py create mode 100644 smartcitizen_connector/models/__init__.py create mode 100644 smartcitizen_connector/models/models.py create mode 100644 smartcitizen_connector/utils/__init__.py create mode 100644 smartcitizen_connector/utils/utils.py delete mode 100644 src/smartcitizen_connector/__init__.py delete mode 100644 src/smartcitizen_connector/models.py delete mode 100644 src/smartcitizen_connector/smartcitizen_connector.py create mode 100644 tests/connector/test_connector.py diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 0000000..0a0d3be --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,60 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python application + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9"] + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Display Python version + run: python -c "import sys; print(sys.version)" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + pip install . + - name: Test with pytest + run: | + pytest + - name: Build package + run: | + python setup.py sdist bdist_wheel + - name: Archive artifacts + uses: actions/upload-artifact@v1 + with: + name: smartcitizen-connector-pkg + path: dist + pypi-publish: + name: Upload release to PyPI + if: startsWith(github.ref, 'refs/tags/') + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/smartcitizen-connector + permissions: + id-token: write + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/setup.cfg b/setup.cfg index 224a779..0f94f37 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [metadata] -description-file = README.md \ No newline at end of file +description_file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py index 140cf0c..fbdd9e1 100644 --- a/setup.py +++ b/setup.py @@ -16,21 +16,18 @@ setup( name="smartcitizen-connector", - version="0.2.0", + version="0.3.0", description="Python connector to download information collected in SmartCitizen API", - author="Óscar González", + author="oscgonfer", license="GNU General Public License v3", keywords=['sensors', 'Smart Citizen'], long_description = open('README.md').read(), long_description_content_type="text/markdown", url="https://github.com/fablabbcn/smartcitizen-connector", - packages=find_packages("src"), - package_dir={"": "src"}, project_urls=PROJECT_URLS, - py_modules=[splitext(basename(path))[0] for path in glob("src/*.py")], python_requires=">=3.6", classifiers=[ - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", 'Intended Audience :: Education', 'Intended Audience :: Science/Research', 'Intended Audience :: Developers', @@ -44,5 +41,7 @@ "Topic :: Utilities", "Natural Language :: English", ], - install_requires=["pydantic", "requests", "pandas", "timezonefinder", "urllib3"], + install_requires=["pydantic", "requests", "pandas", "timezonefinder", "urllib3", "aiohttp"], + setup_requires=['wheel'], + zip_safe=False ) diff --git a/smartcitizen_connector/__init__.py b/smartcitizen_connector/__init__.py new file mode 100644 index 0000000..9df8f6b --- /dev/null +++ b/smartcitizen_connector/__init__.py @@ -0,0 +1,17 @@ +from .models import (Sensor, Measurement, Kit, Owner, Location, + HardwareInfo, Postprocessing, Data, Device) +from .device import SCDevice + +__all__ = [ + "Device", + "Kit", + "Sensor", + "Measurement", + "Owner", + "Location", + "Data", + "Postprocessing", + "HardwareInfo" + ] + +__version__ = '0.3.0' \ No newline at end of file diff --git a/smartcitizen_connector/config/__init__.py b/smartcitizen_connector/config/__init__.py new file mode 100644 index 0000000..d085c3a --- /dev/null +++ b/smartcitizen_connector/config/__init__.py @@ -0,0 +1 @@ +from .config import * \ No newline at end of file diff --git a/smartcitizen_connector/config/config.py b/smartcitizen_connector/config/config.py new file mode 100644 index 0000000..2218de3 --- /dev/null +++ b/smartcitizen_connector/config/config.py @@ -0,0 +1,8 @@ +# Output config +out_level = 'DEBUG' +out_timestamp = True +# Base URL for all methods +API_URL = 'https://api.smartcitizen.me/v0/' +DEVICES_URL = API_URL + 'devices/' +FRONTED_URL = 'https://smartcitizen.me/kits' +BASE_POSTPROCESSING_URL='https://raw.githubusercontent.com/fablabbcn/smartcitizen-data/master/' diff --git a/smartcitizen_connector/device/__init__.py b/smartcitizen_connector/device/__init__.py new file mode 100644 index 0000000..536e5a5 --- /dev/null +++ b/smartcitizen_connector/device/__init__.py @@ -0,0 +1 @@ +from .device import SCDevice \ No newline at end of file diff --git a/smartcitizen_connector/device/device.py b/smartcitizen_connector/device/device.py new file mode 100644 index 0000000..5e7fd1d --- /dev/null +++ b/smartcitizen_connector/device/device.py @@ -0,0 +1,354 @@ +from smartcitizen_connector.models import (Sensor, Kit, Owner, Location, Data, Device, + Measurement, Postprocessing, HardwareInfo) +from smartcitizen_connector.config import * +from smartcitizen_connector.utils import * +from typing import Optional, List +from requests import get +from pandas import DataFrame, to_datetime +from datetime import datetime +from os import environ +from pydantic import TypeAdapter +import aiohttp +import asyncio + +class SCDevice: + id: int + url: str + timezone: str + json: Device + data: DataFrame + + def __init__(self, id): + self.id = id + self.url = f'{DEVICES_URL}{self.id}' + r = self.__safe_get__(self.url) + self.json = TypeAdapter(Device).validate_python(r.json()) + self.__get_timezone__() + self.__check_postprocessing__() + + def __safe_get__(self, url): + r = get(url) + r.raise_for_status() + + return r + + def __get_timezone__(self) -> str: + + if self.json.data.location.latitude is not None and self.json.data.location.longitude is not None: + self.timezone = tf.timezone_at(lng=self.json.data.location.longitude, lat=self.json.data.location.latitude) + + std_out ('Device {} timezone is {}'.format(self.id, self.timezone)) + + return self.timezone + + def __check_postprocessing__(self) -> dict: + + if self.json.postprocessing is not None: + # Check the url in hardware + urls = url_checker(self.json.postprocessing.hardware_url) + # If URL is empty, try prepending base url from config + if not urls: + tentative_url = f"{BASE_POSTPROCESSING_URL}hardware/{self.json.postprocessing.hardware_url}.json" + else: + if len(urls)>1: std_out('URLs for postprocessing recipe are more than one, trying first', 'WARNING') + tentative_url = urls[0] + + self.json.postprocessing.hardware_url = tentative_url + + std_out (f'Device {self.id} has postprocessing information:\n{self.json.postprocessing}') + else: + std_out (f'Device {self.id} has no postprocessing information') + + return self.json.postprocessing + + async def get_datum(self, session, url, headers, sensor_id): + async with session.get(url, headers = headers) as resp: + data = await resp.json() + + if data['readings'] == []: + std_out(f'No data in request for sensor: {sensor_id}', 'WARNING') + return None + + return {sensor_id: data} + + async def get_data(self, + min_date: Optional[datetime] = None, + max_date: Optional[datetime] = None, + rollup: Optional[str] = '1h', + clean_na: Optional[str] = None, + resample: Optional[bool] = False)->DataFrame: + + if 'SC_ADMIN_BEARER' in environ: + std_out('Admin Bearer found, using it', 'SUCCESS') + + headers = {'Authorization':'Bearer ' + environ['SC_ADMIN_BEARER']} + else: + headers = None + std_out('Admin Bearer not found', 'WARNING') + + std_out(f'Requesting data from SC API') + std_out(f'Device ID: {self.id}') + std_out(f'Using rollup: {rollup}') + + if self.timezone is None: + std_out('Device does not have timezone set, skipping', 'WARNING') + return None + + # Check start date and end date + # Converting to UTC by passing None + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.tz_convert.html + if min_date is not None: + min_date = localise_date(to_datetime(min_date), 'UTC') + std_out (f'Min Date: {min_date}') + else: + min_date = localise_date(to_datetime('2001-01-01'), 'UTC') + std_out(f"No min_date specified") + + if max_date is not None: + max_date = localise_date(to_datetime(max_date), 'UTC') + std_out (f'Max Date: {max_date}') + + # Trim based on actual data available + if min_date is not None and self.json.last_reading_at is not None: + if min_date > self.json.last_reading_at: + std_out(f'Device request would yield empty data (min_date). Returning', 'WARNING') + return None + + if max_date is not None and self.json.created_at is not None: + if max_date < self.json.created_at: + std_out(f'Device request would yield empty data (max_date). Returning', 'WARNING') + return None + + if max_date is not None and self.json.last_reading_at is not None: + if max_date > self.json.last_reading_at: + std_out('Trimming max_date to last reading', 'WARNING') + max_date = self.json.last_reading_at + + if self.json.kit is not None: + std_out('Kit ID: {}'.format(self.json.kit.id)) + std_out(f'Device timezone: {self.timezone}') + + if not self.json.data.sensors: + std_out(f'Device is empty') + return None + else: std_out(f"Sensor IDs: {[(sensor.id, sensor.name) for sensor in self.json.data.sensors]}") + + df = DataFrame() + std_out(f'Requesting from {min_date} to {max_date}') + + async with aiohttp.ClientSession() as session: + + tasks = [] + for sensor in self.json.data.sensors: + print (sensor) + + # Request sensor per ID + url = self.url + '/readings?' + + if min_date is not None: url += f'from={min_date}' + if max_date is not None: url += f'&to={max_date}' + + url += f'&rollup={rollup}' + url += f'&sensor_id={sensor.id}' + url += '&function=avg' + + tasks.append(asyncio.ensure_future(self.get_datum(session, url, headers, sensor.id))) + + data = await asyncio.gather(*tasks) + + for datum in data: + if datum is None: continue + sensors = self.json.data.sensors + sensor_id = list(datum.keys())[0] + sensor_name = '' + # Find the id of the sensor + for sensor in sensors: + if sensor.id == sensor_id: + sensor_name = sensor.name + break + + df_sensor = DataFrame(datum[sensor_id]['readings']).set_index(0) + df_sensor.columns = [sensor_name] + df_sensor.index = localise_date(df_sensor.index, self.timezone) + df_sensor.sort_index(inplace=True) + df_sensor = df_sensor[~df_sensor.index.duplicated(keep='first')] + + # Drop unnecessary columns + df_sensor.drop([i for i in df_sensor.columns if 'Unnamed' in i], axis=1, inplace=True) + # Check for weird things in the data + df_sensor = df_sensor.astype(float, errors='ignore') + # Resample + if (resample): + df_sensor = df_sensor.resample(frequency).mean() + + df = df.combine_first(df_sensor) + + try: + df = df.reindex(df.index.rename('TIME')) + df = clean(df, clean_na, how = 'all') + self.data = df + except: + std_out('Problem closing up the API dataframe', 'ERROR') + pass + return None + + std_out(f'Device {self.id} loaded successfully from API', 'SUCCESS') + return self.data + + # @staticmethod + # def get_devices( + # owner_username: Optional[str] = None, + # kit_id: Optional[int] = None, + # city: Optional[str] = None, + # tags: Optional[list] = None, + # tag_method: Optional[str] = 'any', + # full: Optional[bool] = False, + # ) -> List[DeviceSummary]: + # """ + # Gets devices from Smart Citizen API with certain requirements + # Parameters + # ---------- + # user: string + # None + # Username + # kit_id: integer + # None + # Kit ID + # city: string, optional + # Empty string + # City + # tags: list of strings + # None + # Tags for the device (system or user). Default system wide are: indoor, outdoor, online, and offline + # tag_method: string + # 'any' + # 'any' or 'all'. Checks if 'all' the tags are to be included in the tags or it could be any + # full: bool + # False + # Returns a list with if False, or the whole dataframe if True + # Returns + # ------- + # A list of kit IDs that comply with the requirements, or the full df, depending on full. + # If no requirements are set, returns all of them + # """ + + # world_map = get(API_URL + 'devices/world_map') + # df = DataFrame(world_map.json()) + # df = df.dropna(axis=0, how='any') + # df['kit_id'] = df['kit_id'].astype(int) + + # # Location + # if owner_username is not None: df=df[(df['owner_username']==owner_username)] + # if kit_id is not None: df=df[(df['kit_id']==kit_id)] + # if city is not None: df=df[(df['city']==city)] + + # # Tags + # if tags is not None: + # if tag_method == 'any': + # df['has_tags'] = df.apply(lambda x: any(tag in x['system_tags']+x['user_tags'] for tag in tags), axis=1) + # elif tag_method == 'all': + # df['has_tags'] = df.apply(lambda x: all(tag in x['system_tags']+x['user_tags'] for tag in tags), axis=1) + # df=df[(df['has_tags']==True)] + + # return [DeviceSummary(**d) for d in df.to_dict(orient='records')] + + # @staticmethod + # def global_search(value: Optional[str] = None) -> DataFrame: + # """ + # Gets devices from Smart Citizen API based on basic search query values, + # searching both Users and Devices at the same time. + # Global search documentation: https://developer.smartcitizen.me/#global-search + # Parameters + # ---------- + # value: string + # None + # Query to fit + # For null, not_null values, use 'null' or 'not_null' + # Returns + # ------- + # A list of kit IDs that comply with the requirements, or the full df, depending on full. + # """ + + # API_SEARCH_URL = API_URL + "search?q=" + + # # Value check + # if value is None: std_out(f'Value needs a value, {value} supplied', 'ERROR'); return None + + # url = API_SEARCH_URL + f'{value}' + + # df = DataFrame() + # isn = True + # while isn: + # try: + # r = get(url) + # # If status code OK, retrieve data + # if r.status_code == 200 or r.status_code == 201: + # h = process_headers(r.headers) + # df = df.combine_first(DataFrame(r.json()).set_index('id')) + # else: + # std_out('API reported {}'.format(r.status_code), 'ERROR') + # except: + # std_out('Failed request. Probably no connection', 'ERROR') + # pass + + # if 'next' in h: + # if h['next'] == url: isn = False + # elif h['next'] != url: url = h['next'] + # else: + # isn = False + + # return df + + # @staticmethod + # def search_by_query(key: Optional[str] = '', value: Optional[str] = None) -> DataFrame: + # """ + # Gets devices from Smart Citizen API based on ransack parameters + # Basic query documentation: https://developer.smartcitizen.me/#basic-searching + # Parameters + # ---------- + # key: string + # '' + # Query key according to the basic query documentation. Some (not all) parameters are: + # ['id', 'owner_id', 'name', 'description', 'mac_address', 'created_at', + # 'updated_at', 'kit_id', 'geohash', 'last_recorded_at', 'uuid', 'state', + # 'postprocessing_id', 'hardware_info'] + # value: string + # None + # Query to fit + # For null, not_null values, use 'null' or 'not_null' + # Returns + # ------- + # A list of kit IDs that comply with the requirements, or the full df, depending on full. + # """ + + # API_BASE_URL= API_URL + 'devices/' + + # # Value check + # if value is None: std_out(f'Value needs a value, {value} supplied', 'ERROR'); return None + + # if value == 'null' or value == 'not_null': + # url = API_BASE_URL + f'?q[{key}_{value}]=1' + # else: + # url = API_BASE_URL + f'?q[{key}]={value}' + + # df = DataFrame() + # isn = True + # while isn: + # try: + # r = get(url) + # # If status code OK, retrieve data + # if r.status_code == 200 or r.status_code == 201: + # h = process_headers(r.headers) + # df = df.combine_first(DataFrame(r.json()).set_index('id')) + # else: + # std_out('API reported {}'.format(r.status_code), 'ERROR') + # except: + # std_out('Failed request. Probably no connection', 'ERROR') + # pass + + # if 'next' in h: + # if h['next'] == url: isn = False + # elif h['next'] != url: url = h['next'] + # else: + # isn = False + # return df diff --git a/smartcitizen_connector/models/__init__.py b/smartcitizen_connector/models/__init__.py new file mode 100644 index 0000000..b51f13e --- /dev/null +++ b/smartcitizen_connector/models/__init__.py @@ -0,0 +1,2 @@ +from .models import (Sensor, Measurement, Kit, Owner, Location, + HardwareInfo, Postprocessing, Data, Device) \ No newline at end of file diff --git a/smartcitizen_connector/models/models.py b/smartcitizen_connector/models/models.py new file mode 100644 index 0000000..184361e --- /dev/null +++ b/smartcitizen_connector/models/models.py @@ -0,0 +1,87 @@ +from datetime import datetime +from typing import Optional, List +from pydantic import BaseModel + +class Sensor(BaseModel): + id: int + name: str + description: str + unit: str + value: Optional[float] = None + prev_value: Optional[float] = None + last_reading_at: Optional[datetime] = None + +class Measurement(BaseModel): + id: int + name: str + description: str + +class Kit(BaseModel): + id: int + slug: str + name: str + description: str + created_at: datetime + updated_at: datetime + sensors: Optional[List[Sensor]] = None + +class Owner(BaseModel): + id: int + username: str + role: Optional[str] = "" + devices: Optional[List[str]] = None + +class Location(BaseModel): + city: str + country_code: str + country: str + exposure: Optional[str] = None + elevation: Optional[float] = None + geohash: Optional[str] = None + latitude: Optional[float] = None + longitude: Optional[float] = None + +class HardwareInfo(BaseModel): + id: str + mac: str + time: str + esp_bd: str + hw_ver: str + sam_bd: str + esp_ver: str + sam_ver: str + rcause: Optional['str'] = None + +class Postprocessing(BaseModel): + id: int + blueprint_url: Optional['str'] = None + hardware_url: Optional['str'] = None + forwarding_params: Optional['str'] = None + meta: Optional['str'] = None + latest_postprocessing: datetime + created_at: datetime + updated_at: datetime + +class Data(BaseModel): + location: Optional[Location]= None + sensors: Optional[List[Sensor]] = None + +class Device(BaseModel): + id: int + uuid: str + name: str + description: str + state: str + postprocessing: Optional[Postprocessing] = None + hardware_info: HardwareInfo + system_tags: List[str] + user_tags: List[str] + is_private: bool + notify_low_battery: bool + notify_stopped_publishing: bool + last_reading_at: datetime + created_at: Optional['datetime'] = None + updated_at: datetime + owner: Owner + data: Data + kit: Optional[Kit] = None diff --git a/smartcitizen_connector/utils/__init__.py b/smartcitizen_connector/utils/__init__.py new file mode 100644 index 0000000..90f60fd --- /dev/null +++ b/smartcitizen_connector/utils/__init__.py @@ -0,0 +1 @@ +from .utils import * \ No newline at end of file diff --git a/smartcitizen_connector/utils/utils.py b/smartcitizen_connector/utils/utils.py new file mode 100644 index 0000000..1713d8c --- /dev/null +++ b/smartcitizen_connector/utils/utils.py @@ -0,0 +1,139 @@ +from pandas import to_datetime +from timezonefinder import TimezoneFinder +from datetime import datetime +from smartcitizen_connector.config import * +from typing import Optional + +tf = TimezoneFinder() + +# Convertion between SC and Pandas API rollups +rollup_table = { + "y": "years", + "M": "months", + "w": "weeks", + "d": "days", + "h": "hours", + "m": "minutes", + "s": "seconds", + "ms": "milliseconds" +} + +rollup_2_freq_lut = ( + ['A', 'y'], + ['M', 'M'], + ['W', 'w'], + ['D', 'd'], + ['H', 'h'], + ['Min', 'm'], + ['S', 's'], + ['ms', 'ms'] +) + +def clean(df, clean_na = None, how = 'all'): + """ + Helper function for cleaning nan in a pandas. Parameters + ---------- + df: pandas. The o clean + clean_na: None or string + type of nan cleaning. If not None, can be 'drop' or 'fill' + how: 'string' + Same as how in dropna, fillna. Can be 'any', or 'all' + Returns + ------- + Clean dataframe + """ + + if clean_na is not None: + if clean_na == 'drop': + df.dropna(axis = 0, how = how, inplace = True) + elif clean_na == 'fill': + df = df.fillna(method = 'bfill').fillna(method = 'ffill') + return df + +def convert_rollup_to_freq(rollup): + # Convert frequency from pandas to API's + for index, letter in enumerate(rollup): + try: + aux = int(letter) + except: + index_first = index + letter_first = letter + frequency_value = rollup[:index_first] + rollup_unit = rollup[index_first:] + break + + for item in rollup_2_freq_lut: + if item[1] == rollup_unit: + frequency_unit = item[0] + break + + frequency = frequency_value + frequency_unit + return frequency + +def localise_date(date, timezone, tzaware=True): + """ + Localises a date if it's tzinfo is None, otherwise converts it to it. + If the timestamp is tz-aware, converts it as well + Parameters + ---------- + date: string or datetime + Date + timezone: string + Timezone string. i.e.: 'Europe/Madrid' + Returns + ------- + The date converted to 'UTC' and localised based on the timezone + """ + if date is not None: + # Per default, we consider that timestamps are tz-aware or UTC. + # If not, preprocessing should be done to get there + result_date = to_datetime(date, utc = tzaware) + if result_date.tzinfo is not None: + result_date = result_date.tz_convert(timezone) + else: + result_date = result_date.tz_localize(timezone) + else: + result_date = None + + return result_date + +def std_out(msg: str, + mtype: Optional[str] = None, + force: Optional[bool] = False + ): + + if out_timestamp == True: + stamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + else: + stamp = '' + # Output levels: + # 'QUIET': nothing, + # 'NORMAL': warn, err + # 'DEBUG': info, warn, err, success + if force == True: priority = 2 + elif out_level == 'QUIET': priority = 0 + elif out_level == 'NORMAL': priority = 1 + elif out_level == 'DEBUG': priority = 2 + + if mtype is None and priority>1: + print(f'[{stamp}] - ' + '[INFO] ' + msg) + elif mtype == 'SUCCESS' and priority>0: + print(f'[{stamp}] - ' + '[SUCCESS] ' + msg) + elif mtype == 'WARNING' and priority>0: + print(f'[{stamp}] - ' + '[WARNING] ' + msg) + elif mtype == 'ERROR' and priority>0: + print(f'[{stamp}] - ' + '[ERROR] ' + msg) + +import re + +''' Directly from +https://www.geeksforgeeks.org/python-check-url-string/ +''' + +def url_checker(string): + if string is not None: + regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" + url = re.findall(regex,string) + return [x[0] for x in url] + else: + return [] \ No newline at end of file diff --git a/src/smartcitizen_connector/__init__.py b/src/smartcitizen_connector/__init__.py deleted file mode 100644 index 1ceb4af..0000000 --- a/src/smartcitizen_connector/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from .models import Device, DeviceSummary, Kit, Sensor, Owner, Location, Data -from .smartcitizen_connector import (ScApiDevice, std_out, rollup_table, localise_date) - -__all__ = [ - "Device", - "DeviceSummary", - "Kit", - "Sensor", - "Owner", - "Location", - "Data" - ] diff --git a/src/smartcitizen_connector/models.py b/src/smartcitizen_connector/models.py deleted file mode 100644 index 79a3848..0000000 --- a/src/smartcitizen_connector/models.py +++ /dev/null @@ -1,67 +0,0 @@ -from datetime import datetime -from typing import Optional, List -from pydantic import BaseModel - -class Sensor(BaseModel): - id: int - name: Optional[str] = None - description: Optional[str] = None - unit: Optional[str] = None - -class Kit(BaseModel): - id: int - slug: Optional[str] = None - name: Optional[str] = None - description: Optional[str] = None - created_at: Optional[datetime] = None - updated_at: Optional[datetime] = None - sensors: Optional[List[Sensor]] = None - -class Owner(BaseModel): - id: int - username: Optional[str] = None - username: Optional[str] = None - -class Location(BaseModel): - latitude: Optional[float] = None - longitude: Optional[float] = None - city: Optional[str] = None - country_code: Optional[str] = None - -class Data(BaseModel): - location: Optional[Location]= None - sensors: Optional[List[Sensor]] = None - -class DeviceSummary(BaseModel): - id: int - name: Optional[str] = None - description: Optional[str] = None - added_at: Optional[datetime] = None - updated_at: Optional[datetime] = None - last_reading_at: Optional[datetime] = None - owner_id: Optional[int] = None - owner_username: Optional[str] = None - user_tags: Optional[List] = None - system_tags: Optional[List] = None - state: Optional[str] = None - kit_id: Optional[float] = None - latitude: Optional[float] = None - longitude: Optional[float] = None - city: Optional[str] = None - country_code: Optional[str] = None - -class Device(BaseModel): - id: int - name: Optional[str] = None - description: Optional[str] = None - added_at: Optional[datetime] = None - updated_at: Optional[datetime] = None - last_reading_at: Optional[datetime] = None - data: Optional[Data] = None - owner: Optional[Owner] = None - owner_id: Optional[int] = None - owner_username: Optional[str] = None - user_tags: Optional[List] = None - system_tags: Optional[List] = None - state: Optional[str] = None - kit: Optional[Kit] = None diff --git a/src/smartcitizen_connector/smartcitizen_connector.py b/src/smartcitizen_connector/smartcitizen_connector.py deleted file mode 100644 index 1a39835..0000000 --- a/src/smartcitizen_connector/smartcitizen_connector.py +++ /dev/null @@ -1,817 +0,0 @@ -#!/usr/bin/env python3 -from .models import Sensor, Kit, Owner, Location, Data, Device, DeviceSummary -from typing import Optional, List -from requests import get -import urllib3 -from pandas import DataFrame, to_datetime -from timezonefinder import TimezoneFinder -from datetime import datetime - -tf = TimezoneFinder() - -urllib3.disable_warnings() - -rollup_table = { - "y": "years", - "M": "months", - "w": "weeks", - "d": "days", - "h": "hours", - "m": "minutes", - "s": "seconds", - "ms": "milliseconds" -} - -rollup_2_freq_lut = ( - ['A', 'y'], - ['M', 'M'], - ['W', 'w'], - ['D', 'd'], - ['H', 'h'], - ['Min', 'm'], - ['S', 's'], - ['ms', 'ms'] -) - -# Output config -out_level = 'NORMAL' -out_timestamp = True - -def clean(df, clean_na = None, how = 'all'): - """ - Helper function for cleaning nan in a pandas.DataFrame - Parameters - ---------- - df: pandas.DataFrame - The dataframe to clean - clean_na: None or string - type of nan cleaning. If not None, can be 'drop' or 'fill' - how: 'string' - Same as how in dropna, fillna. Can be 'any', or 'all' - Returns - ------- - Clean dataframe - """ - - if clean_na is not None: - if clean_na == 'drop': - df.dropna(axis = 0, how = how, inplace = True) - elif clean_na == 'fill': - df = df.fillna(method = 'bfill').fillna(method = 'ffill') - return df - -def convert_rollup_to_freq(rollup): - # Convert frequency from pandas to API's - for index, letter in enumerate(rollup): - try: - aux = int(letter) - except: - index_first = index - letter_first = letter - frequency_value = rollup[:index_first] - rollup_unit = rollup[index_first:] - break - - for item in rollup_2_freq_lut: - if item[1] == rollup_unit: - frequency_unit = item[0] - break - - frequency = frequency_value + frequency_unit - return frequency - -def localise_date(date, timezone, tzaware=True): - """ - Localises a date if it's tzinfo is None, otherwise converts it to it. - If the timestamp is tz-aware, converts it as well - Parameters - ---------- - date: string or datetime - Date - timezone: string - Timezone string. i.e.: 'Europe/Madrid' - Returns - ------- - The date converted to 'UTC' and localised based on the timezone - """ - if date is not None: - # Per default, we consider that timestamps are tz-aware or UTC. - # If not, preprocessing should be done to get there - result_date = to_datetime(date, utc = tzaware) - if result_date.tzinfo is not None: - result_date = result_date.tz_convert(timezone) - else: - result_date = result_date.tz_localize(timezone) - else: - result_date = None - - return result_date - -def std_out(msg: str, - mtype: Optional[str] = None, - force: Optional[bool] = False - ): - - if out_timestamp == True: - stamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - else: - stamp = '' - # Output levels: - # 'QUIET': nothing, - # 'NORMAL': warn, err - # 'DEBUG': info, warn, err, success - if force == True: priority = 2 - elif out_level == 'QUIET': priority = 0 - elif out_level == 'NORMAL': priority = 1 - elif out_level == 'DEBUG': priority = 2 - - if mtype is None and priority>1: - print(f'[{stamp}] - ' + '[INFO] ' + msg) - elif mtype == 'SUCCESS' and priority>0: - print(f'[{stamp}] - ' + '[SUCCESS] ' + msg) - elif mtype == 'WARNING' and priority>0: - print(f'[{stamp}] - ' + '[WARNING] ' + msg) - elif mtype == 'ERROR' and priority>0: - print(f'[{stamp}] - ' + '[ERROR] ' + msg) - -# Base URL for all methods -API_URL = 'https://api.smartcitizen.me/v0/' - -class ScApiDevice: - - API_BASE_URL= API_URL + 'devices/' - - def __init__ (self, device_id: int): - - self.id = device_id # the number after https://smartcitizen.me/kits/###### - - # Additional device stuff - self.kit_id = None # the number that defines the type of blueprint - self.mac = None - self.last_reading_at = None - self.added_at = None - self.timezone = None - self.lat = None - self.long = None - self.alt = None - self.data = None - self.sensors = None - self.devicejson = None - self.postprocessing = None - self._url = f'https://smartcitizen.me/kits/{self.id}' - self._api_url = f'{self.API_BASE_URL}{self.id}' - - @property - def url(self) -> str: - return self._url - - @property - def api_url(self) -> str: - return self._api_url - - @staticmethod - def get_kits() -> List[Kit]: - kits = get(API_URL + 'kits/?per_page=200') - - if kits.status_code == 429: - std_out('API reported {}. Retrying once'.format(kits.status_code), - 'WARNING') - return None - - if kits.status_code == 200 or kits.status_code == 201: - result = [Kit(**kit) for kit in kits.json()] - return result - else: - std_out('API reported {}'.format(kits.status_code), 'ERROR') - return None - - @staticmethod - def get_device_info(id:int) -> DeviceSummary: - device = get(API_URL + 'devices/{}/'.format(id)) - - if device.status_code == 429: - std_out('API reported {}. Retrying once'.format(device.status_code), - 'WARNING') - return None - - if device.status_code == 200 or device.status_code == 201: - dj = device.json() - - dj['owner_id'] = int(dj['owner']['id']) - dj['owner_username'] = dj['owner']['username'] - dj['latitude'] = float(dj['data']['location']['latitude']) - dj['longitude'] = float(dj['data']['location']['longitude']) - dj['city'] = dj['data']['location']['city'] - dj['country_code'] = dj['data']['location']['country_code'] - dj['kit_id'] = int(dj['kit']['id']) - - result = DeviceSummary(**dj) - return result - else: - std_out('API reported {}'.format(device.status_code), 'ERROR') - return None - - @staticmethod - def get_devices( - owner_username: Optional[str] = None, - kit_id: Optional[int] = None, - city: Optional[str] = None, - tags: Optional[list] = None, - tag_method: Optional[str] = 'any', - full: Optional[bool] = False, - ) -> List[DeviceSummary]: - """ - Gets devices from Smart Citizen API with certain requirements - Parameters - ---------- - user: string - None - Username - kit_id: integer - None - Kit ID - city: string, optional - Empty string - City - tags: list of strings - None - Tags for the device (system or user). Default system wide are: indoor, outdoor, online, and offline - tag_method: string - 'any' - 'any' or 'all'. Checks if 'all' the tags are to be included in the tags or it could be any - full: bool - False - Returns a list with if False, or the whole dataframe if True - Returns - ------- - A list of kit IDs that comply with the requirements, or the full df, depending on full. - If no requirements are set, returns all of them - """ - - world_map = get(API_URL + 'devices/world_map') - df = DataFrame(world_map.json()) - df = df.dropna(axis=0, how='any') - df['kit_id'] = df['kit_id'].astype(int) - - # Location - if owner_username is not None: df=df[(df['owner_username']==owner_username)] - if kit_id is not None: df=df[(df['kit_id']==kit_id)] - if city is not None: df=df[(df['city']==city)] - - # Tags - if tags is not None: - if tag_method == 'any': - df['has_tags'] = df.apply(lambda x: any(tag in x['system_tags']+x['user_tags'] for tag in tags), axis=1) - elif tag_method == 'all': - df['has_tags'] = df.apply(lambda x: all(tag in x['system_tags']+x['user_tags'] for tag in tags), axis=1) - df=df[(df['has_tags']==True)] - - return [DeviceSummary(**d) for d in df.to_dict(orient='records')] - - @staticmethod - def global_search(value: Optional[str] = None) -> DataFrame: - """ - Gets devices from Smart Citizen API based on basic search query values, - searching both Users and Devices at the same time. - Global search documentation: https://developer.smartcitizen.me/#global-search - Parameters - ---------- - value: string - None - Query to fit - For null, not_null values, use 'null' or 'not_null' - Returns - ------- - A list of kit IDs that comply with the requirements, or the full df, depending on full. - """ - - API_BASE_URL = "https://api.smartcitizen.me/v0/search?q=" - - # Value check - if value is None: std_out(f'Value needs a value, {value} supplied', 'ERROR'); return None - - url = API_BASE_URL + f'{value}' - - df = DataFrame() - isn = True - while isn: - try: - r = get(url) - # If status code OK, retrieve data - if r.status_code == 200 or r.status_code == 201: - h = process_headers(r.headers) - df = df.combine_first(DataFrame(r.json()).set_index('id')) - else: - std_out('API reported {}'.format(r.status_code), 'ERROR') - except: - std_out('Failed request. Probably no connection', 'ERROR') - pass - - if 'next' in h: - if h['next'] == url: isn = False - elif h['next'] != url: url = h['next'] - else: - isn = False - - return df - - @staticmethod - def search_by_query(key: Optional[str] = '', value: Optional[str] = None) -> DataFrame: - """ - Gets devices from Smart Citizen API based on ransack parameters - Basic query documentation: https://developer.smartcitizen.me/#basic-searching - Parameters - ---------- - key: string - '' - Query key according to the basic query documentation. Some (not all) parameters are: - ['id', 'owner_id', 'name', 'description', 'mac_address', 'created_at', - 'updated_at', 'kit_id', 'geohash', 'last_recorded_at', 'uuid', 'state', - 'postprocessing_id', 'hardware_info'] - value: string - None - Query to fit - For null, not_null values, use 'null' or 'not_null' - Returns - ------- - A list of kit IDs that comply with the requirements, or the full df, depending on full. - """ - - API_BASE_URL = "https://api.smartcitizen.me/v0/devices/" - - # Value check - if value is None: std_out(f'Value needs a value, {value} supplied', 'ERROR'); return None - - if value == 'null' or value == 'not_null': - url = API_BASE_URL + f'?q[{key}_{value}]=1' - else: - url = API_BASE_URL + f'?q[{key}]={value}' - - df = DataFrame() - isn = True - while isn: - try: - r = get(url) - # If status code OK, retrieve data - if r.status_code == 200 or r.status_code == 201: - h = process_headers(r.headers) - df = df.combine_first(DataFrame(r.json()).set_index('id')) - else: - std_out('API reported {}'.format(r.status_code), 'ERROR') - except: - std_out('Failed request. Probably no connection', 'ERROR') - pass - - if 'next' in h: - if h['next'] == url: isn = False - elif h['next'] != url: url = h['next'] - else: - isn = False - return df - - def get_mac(self, update: Optional[bool] = None) -> str: - if self.mac is None or update: - std_out(f'Requesting MAC from API for device {self.id}') - # Get device - try: - deviceR = get(self.API_BASE_URL + '{}/'.format(self.id)) - - # If status code OK, retrieve data - if deviceR.status_code == 200 or deviceR.status_code == 201: - if 'hardware_info' in deviceR.json().keys(): self.mac = deviceR.json()['hardware_info']['mac'] - std_out ('Device {} is has this MAC {}'.format(self.id, self.mac)) - else: - std_out('API reported {}'.format(deviceR.status_code), 'ERROR') - except: - std_out('Failed request. Probably no connection', 'ERROR') - pass - - return self.mac - - def get_device_json(self, update: Optional[bool] = None) -> dict: - if self.devicejson is None or update: - try: - deviceR = get(self.API_BASE_URL + '{}/'.format(self.id)) - if deviceR.status_code == 429: - std_out('API reported {}. Retrying once'.format(deviceR.status_code), - 'WARNING') - sleep(30) - deviceR = get(self.API_BASE_URL + '{}/'.format(self.id)) - - if deviceR.status_code == 200 or deviceR.status_code == 201: - self.devicejson = deviceR.json() - else: - std_out('API reported {}'.format(deviceR.status_code), 'ERROR') - except: - std_out('Failed request. Probably no connection', 'ERROR') - pass - return self.devicejson - - def get_device_description(self, update: Optional[bool] = None) -> str: - if self.get_device_json(update) is not None: - return self.get_device_json()['kit']['description'] - return None - - def get_kit_ID(self, update: Optional[bool] = None) -> int: - - if self.kit_id is None or update: - if self.get_device_json(update) is not None: - self.kit_id = self.devicejson['kit']['id'] - - return self.kit_id - - def get_device_last_reading(self, update: Optional[bool] = None) -> datetime: - - if self.last_reading_at is None or update: - if self.get_device_json(update) is not None and self.get_device_json(update)['state'] != 'never_published': - self.last_reading_at = localise_date(self.devicejson['last_reading_at'], 'UTC').strftime('%Y-%m-%dT%H:%M:%SZ') - - std_out ('Device {} has last reading at {}'.format(self.id, self.last_reading_at)) - - return self.last_reading_at - - def get_device_added_at(self, update: Optional[bool] = None) -> datetime: - - if self.added_at is None or update: - if self.get_device_json(update) is not None: - self.added_at = localise_date(self.devicejson['added_at'], 'UTC').strftime('%Y-%m-%dT%H:%M:%SZ') - - std_out ('Device {} was added at {}'.format(self.id, self.added_at)) - - return self.added_at - - def get_device_postprocessing(self, update: Optional[bool] = None) -> dict: - - if self.postprocessing is None or update: - if self.get_device_json(update) is not None: - self.postprocessing = self.devicejson['postprocessing'] - - if self.postprocessing is not None: - # Check the url in hardware - if 'hardware_url' in self.postprocessing: - urls = url_checker(self.postprocessing['hardware_url']) - # If URL is empty, try prepending base url from config - if not urls: - tentative_url = f"{config._base_postprocessing_url}hardware/{self.postprocessing['hardware_url']}.{config._default_file_type}" - else: - if len(urls)>1: std_out('URLs for postprocessing recipe are more than one, trying first', 'WARNING') - tentative_url = urls[0] - - self.postprocessing['hardware_url'] = tentative_url - - std_out ('Device {} has postprocessing information:\n{}'.format(self.id, self.postprocessing)) - else: - std_out (f'Device {self.id} has no postprocessing information') - - return self.postprocessing - - def get_device_timezone(self, update: Optional[bool] = None) -> str: - - if self.timezone is None or update: - latitude, longitude = self.get_device_lat_long(update) - # Localize it - - if latitude is not None and longitude is not None: - self.timezone = tf.timezone_at(lng=longitude, lat=latitude) - - std_out ('Device {} timezone is {}'.format(self.id, self.timezone)) - - return self.timezone - - def get_device_lat_long(self, update: Optional[bool] = None) -> tuple: - - if self.lat is None or self.long is None or update: - if self.get_device_json(update) is not None: - latidude = longitude = None - if 'location' in self.devicejson.keys(): - latitude, longitude = self.devicejson['location']['latitude'], self.devicejson['location']['longitude'] - elif 'data' in self.devicejson.keys(): - if 'location' in self.devicejson['data'].keys(): - latitude, longitude = self.devicejson['data']['location']['latitude'], self.devicejson['data']['location']['longitude'] - - self.lat = latitude - self.long = longitude - - std_out ('Device {} is located at {}, {}'.format(self.id, self.lat, self.long)) - - return (self.lat, self.long) - - def get_device_alt(self, update: Optional[bool] = None) -> float: - - if self.lat is None or self.long is None: - self.get_device_lat_long(update) - - if self.alt is None or update: - self.alt = get_elevation(_lat = self.lat, _long = self.long) - - std_out ('Device {} altitude is {}m'.format(self.id, self.alt)) - - return self.alt - - def get_device_sensors(self, update: Optional[bool] = None) -> dict: - - if self.sensors is None or update: - if self.get_device_json(update) is not None: - # Get available sensors in platform - sensors = self.devicejson['data']['sensors'] - - # Put the ids and the names in lists - self.sensors = dict() - for sensor in sensors: - self.sensors[sensor['id']] = sensor['name'] - - return self.sensors - - def get_device_data(self, - min_date: Optional[datetime] = None, - max_date: Optional[datetime] = None, - rollup: Optional[str] = '1h', - clean_na: Optional[str] = None, - resample: Optional[bool] = False)->DataFrame: - - std_out(f'Requesting data from SC API') - std_out(f'Device ID: {self.id}') - - std_out(f'Using rollup: {rollup}') - - # Make sure we have the everything we need beforehand - self.get_device_sensors() - self.get_device_timezone() - self.get_device_last_reading() - self.get_device_added_at() - self.get_kit_ID() - - if self.timezone is None: - std_out('Device does not have timezone set, skipping', 'WARNING') - return None - - # Check start date and end date - # Converting to UTC by passing None - # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.tz_convert.html - if min_date is not None: - min_date = localise_date(to_datetime(min_date), 'UTC').strftime('%Y-%m-%dT%H:%M:%S') - std_out (f'Min Date: {min_date}') - else: - min_date = localise_date(to_datetime('2001-01-01'), 'UTC').strftime('%Y-%m-%dT%H:%M:%S') - std_out(f"No min_date specified") - - if max_date is not None: - max_date = localise_date(to_datetime(max_date), 'UTC').strftime('%Y-%m-%dT%H:%M:%S') - std_out (f'Max Date: {max_date}') - - # Trim based on actual data available - if min_date is not None and self.last_reading_at is not None: - if min_date > self.last_reading_at: - std_out(f'Device request would yield empty data (min_date). Returning', 'WARNING') - return None - - if max_date is not None and self.added_at is not None: - if max_date < self.added_at: - std_out(f'Device request would yield empty data (max_date). Returning', 'WARNING') - return None - - if max_date is not None and self.last_reading_at is not None: - if max_date > self.last_reading_at: - std_out('Trimming max_date to last reading', 'WARNING') - max_date = self.last_reading_at - - # Print stuff - std_out('Kit ID: {}'.format(self.kit_id)) - std_out(f'Device timezone: {self.timezone}') - if not self.sensors.keys(): - std_out(f'Device is empty') - return None - else: std_out(f'Sensor IDs: {list(self.sensors.keys())}') - - df = DataFrame() - std_out(f'Requesting from {min_date} to {max_date}') - - # Get devices in the sensor first - for sensor_id in self.sensors.keys(): - - # Request sensor per ID - request = self.API_BASE_URL + '{}/readings?'.format(self.id) - - if min_date is not None: request += f'from={min_date}' - if max_date is not None: request += f'&to={max_date}' - - request += f'&rollup={rollup}' - request += f'&sensor_id={sensor_id}' - request += '&function=avg' - - # Make request - headers = {'Content-type': 'application/json'} - response = get(request, headers = headers) - - # Retry once in case of 429 after 30s - if response.status_code == 429: - std_out('Too many requests, waiting for 1 more retry', 'WARNING') - sleep (30) - response = get(request, headers = headers) - - flag_error = False - try: - sensorjson = response.json() - except: - std_out(f'Problem with json data from API, {response.status_code}', 'ERROR') - flag_error = True - pass - continue - - if 'readings' not in sensorjson.keys(): - std_out(f'No readings key in request for sensor: {sensor_id} ({self.sensors[sensor_id]})', 'ERROR') - flag_error = True - continue - - elif sensorjson['readings'] == []: - std_out(f'No data in request for sensor: {sensor_id} ({self.sensors[sensor_id]})', 'WARNING') - flag_error = True - continue - - if flag_error: continue - - try: - dfsensor = DataFrame(sensorjson['readings']).set_index(0) - dfsensor.columns = [self.sensors[sensor_id]] - # dfsensor.index = to_datetime(dfsensor.index).tz_localize('UTC').tz_convert(self.timezone) - dfsensor.index = localise_date(dfsensor.index, self.timezone) - dfsensor.sort_index(inplace=True) - dfsensor = dfsensor[~dfsensor.index.duplicated(keep='first')] - - # Drop unnecessary columns - dfsensor.drop([i for i in dfsensor.columns if 'Unnamed' in i], axis=1, inplace=True) - # Check for weird things in the data - dfsensor = dfsensor.astype(float, errors='ignore') - # dfsensor = dfsensor.apply(to_numeric, errors='coerce') - # Resample - if (resample): - dfsensor = dfsensor.resample(convert_rollup_to_freq(rollup)).mean() - df = df.combine_first(dfsensor) - except: - print_exc() - std_out('Problem with sensor data from API', 'ERROR') - flag_error = True - pass - continue - - try: - df = df.reindex(df.index.rename('TIME')) - df = clean(df, clean_na, how = 'all') - self.data = df - - except: - std_out('Problem closing up the API dataframe', 'ERROR') - pass - return None - - if flag_error == False: std_out(f'Device {self.id} loaded successfully from API', 'SUCCESS') - return self.data - - -# def get_device_data(self, min_date = None, max_date = None, frequency = '1Min', clean_na = None, resample = True): - -# if 'SC_ADMIN_BEARER' in environ: -# std_out('Admin Bearer found, using it', 'SUCCESS') - -# headers = {'Authorization':'Bearer ' + environ['SC_ADMIN_BEARER']} -# else: -# headers = None -# std_out('Admin Bearer not found', 'WARNING') - -# std_out(f'Requesting data from SC API') -# std_out(f'Device ID: {self.id}') - -# rollup = self.convert_rollup(frequency) -# std_out(f'Using rollup: {rollup}') - -# # Make sure we have the everything we need beforehand -# self.get_device_sensors() -# self.get_device_timezone() -# self.get_device_last_reading() -# self.get_device_added_at() -# self.get_kit_ID() - -# if self.timezone is None: -# std_out('Device does not have timezone set, skipping', 'WARNING') -# return None - -# # Check start date and end date -# # Converting to UTC by passing None -# # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.tz_convert.html -# if min_date is not None: -# min_date = localise_date(to_datetime(min_date), 'UTC').strftime('%Y-%m-%dT%H:%M:%S') -# std_out (f'Min Date: {min_date}') -# else: -# min_date = localise_date(to_datetime('2001-01-01'), 'UTC').strftime('%Y-%m-%dT%H:%M:%S') -# std_out(f"No min_date specified") - -# if max_date is not None: -# max_date = localise_date(to_datetime(max_date), 'UTC').strftime('%Y-%m-%dT%H:%M:%S') -# std_out (f'Max Date: {max_date}') - -# # Trim based on actual data available -# if min_date is not None and self.last_reading_at is not None: -# if min_date > self.last_reading_at: -# std_out(f'Device request would yield empty data (min_date). Returning', 'WARNING') -# return None - -# if max_date is not None and self.added_at is not None: -# if max_date < self.added_at: -# std_out(f'Device request would yield empty data (max_date). Returning', 'WARNING') -# return None - -# if max_date is not None and self.last_reading_at is not None: -# if max_date > self.last_reading_at: -# std_out('Trimming max_date to last reading', 'WARNING') -# max_date = self.last_reading_at - -# # Print stuff -# std_out('Kit ID: {}'.format(self.kit_id)) -# std_out(f'Device timezone: {self.timezone}') -# if not self.sensors.keys(): -# std_out(f'Device is empty') -# return None -# else: std_out(f'Sensor IDs: {list(self.sensors.keys())}') - -# df = DataFrame() -# std_out(f'Requesting from {min_date} to {max_date}') - -# # Get devices in the sensor first -# for sensor_id in self.sensors.keys(): - -# # Request sensor per ID -# request = self.API_BASE_URL + '{}/readings?'.format(self.id) - -# if min_date is not None: request += f'from={min_date}' -# if max_date is not None: request += f'&to={max_date}' - -# request += f'&rollup={rollup}' -# request += f'&sensor_id={sensor_id}' -# request += '&function=avg' - -# # Make request -# response = get(request, headers = headers) - -# # Retry once in case of 429 after 30s -# if response.status_code == 429: -# std_out('Too many requests, waiting for 1 more retry', 'WARNING') -# sleep (30) -# response = get(request, headers = headers) - -# flag_error = False -# try: -# sensorjson = response.json() -# except: -# std_out(f'Problem with json data from API, {response.status_code}', 'ERROR') -# flag_error = True -# pass -# continue - -# if 'readings' not in sensorjson.keys(): -# std_out(f'No readings key in request for sensor: {sensor_id} ({self.sensors[sensor_id]})', 'ERROR') -# flag_error = True -# continue - -# elif sensorjson['readings'] == []: -# std_out(f'No data in request for sensor: {sensor_id} ({self.sensors[sensor_id]})', 'WARNING') -# flag_error = True -# continue - -# if flag_error: continue - -# try: -# dfsensor = DataFrame(sensorjson['readings']).set_index(0) -# dfsensor.columns = [self.sensors[sensor_id]] -# # dfsensor.index = to_datetime(dfsensor.index).tz_localize('UTC').tz_convert(self.timezone) -# dfsensor.index = localise_date(dfsensor.index, self.timezone) -# dfsensor.sort_index(inplace=True) -# dfsensor = dfsensor[~dfsensor.index.duplicated(keep='first')] - -# # Drop unnecessary columns -# dfsensor.drop([i for i in dfsensor.columns if 'Unnamed' in i], axis=1, inplace=True) -# # Check for weird things in the data -# dfsensor = dfsensor.astype(float, errors='ignore') -# # dfsensor = dfsensor.apply(to_numeric, errors='coerce') -# # Resample -# if (resample): -# dfsensor = dfsensor.resample(frequency).mean() -# df = df.combine_first(dfsensor) -# except: -# print_exc() -# std_out('Problem with sensor data from API', 'ERROR') -# flag_error = True -# pass -# continue - -# try: -# df = df.reindex(df.index.rename('TIME')) -# df = clean(df, clean_na, how = 'all') -# self.data = df - -# except: -# std_out('Problem closing up the API dataframe', 'ERROR') -# pass -# return None - -# if flag_error == False: std_out(f'Device {self.id} loaded successfully from API', 'SUCCESS') -# return self.data diff --git a/tests/connector/test_connector.py b/tests/connector/test_connector.py new file mode 100644 index 0000000..cbfdc28 --- /dev/null +++ b/tests/connector/test_connector.py @@ -0,0 +1,32 @@ +import pytest +from smartcitizen_connector import SCDevice +from smartcitizen_connector.utils import localise_date +import asyncio + +def test_connector(): + id = 16549 + rollup = '1m' + resample = False + uuid = "d030cb8a-2c2a-429e-9f04-416888708193" + min_date = '2023-07-29T09:00:06Z' + max_date = None + + d = SCDevice(id) + asyncio.run(d.get_data( + min_date = localise_date(min_date, d.timezone), + max_date = localise_date(max_date, d.timezone), + rollup = rollup, + clean_na = None, + resample = resample) + ) + + d0 = d.data.index[0].tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ') + print (d0) + print (d.id) + print (d.url) + print (d.json) + + assert d.json.id == 16549, resp.text + assert d.data is not None, resp.text + assert d.json.uuid == uuid, resp.text + assert d0 == min_date, resp.text \ No newline at end of file