Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add convenience funtion alter_db_table_column to add/drop databricks table columns #139

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,19 @@ History
=======


v0.21.1 (2024-10-10)

* Add convenience funtion alter_db_table_column to add/drop databricks table columns.
* No longer require installing haversine.
* Update fakeredis==2.25.1.
* Update mlflow==2.16.2.
* Update pandas==2.2.3.
* Update polars==1.9.0.
* Update pylint==3.3.1.
* Update pysmb==1.2.10.
* Update redis==5.1.1.


v0.21.0 (2024-09-17)

* Update cython==3.0.11.
Expand Down
1 change: 1 addition & 0 deletions aioradio/aws/dynamodb.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Generic async AWS functions for DynamoDB."""

# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments

from typing import Any, Dict, List

Expand Down
1 change: 1 addition & 0 deletions aioradio/aws/sqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# pylint: disable=dangerous-default-value
# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments

import logging
from typing import Any, Dict, List
Expand Down
21 changes: 13 additions & 8 deletions aioradio/ds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# pylint: disable=protected-access
# pylint: disable=too-many-arguments
# pylint: disable=too-many-boolean-expressions
# pylint: disable=too-many-positional-arguments
# pylint: disable=unnecessary-comprehension
# pylint: disable=unused-argument
# pylint: disable=unused-import
Expand All @@ -32,7 +33,6 @@
import polars as pl
import pyarrow as pa
import pyarrow.dataset as ds
from haversine import haversine, Unit
from mlflow.entities.model_registry.model_version_status import ModelVersionStatus
from mlflow.tracking.client import MlflowClient
from pyspark.sql import SparkSession
Expand All @@ -56,6 +56,17 @@
############################### Databricks functions ################################


def alter_db_table_column(table: str, column: str, cmd: str, dtype: str=''):
"""Convenience function to either add or drop a single column in a
databricks table."""

cmd = cmd.upper()
if cmd == 'ADD':
spark.sql(f'ALTER TABLE {table} ADD COLUMN ({column} {dtype})')
elif cmd == 'DROP':
spark.sql(f'ALTER TABLE {table} DROP COLUMN IF EXISTS ({column})')


def db_catalog(env):
"""Return the DataBricks catalog based on the passed in environment."""

Expand Down Expand Up @@ -237,7 +248,7 @@ def promote_model_to_production(model_name, tags):
logger.info(f"Model status: {ModelVersionStatus.to_string(status)}")
if status == ModelVersionStatus.READY:
break
time.sleep(1)
sleep(1)

registered_model = client.get_registered_model(model_name)
logger.info(f"registered_model: {registered_model}")
Expand Down Expand Up @@ -479,12 +490,6 @@ def apply_bearing(dataframe, latitude, longitude):
return dataframe.apply(lambda x: bearing(x.LATITUDE, latitude, x.LONGITUDE, longitude), axis=1)


def apply_haversine(dataframe, latitude, longitude):
"""Apply haversine function on split dataframe."""

return dataframe.apply(lambda x: haversine((x.LATITUDE, x.LONGITUDE), (latitude, longitude), unit=Unit.MILES), axis=1)


def logit(x, a, b, c, d):
"""Logit function."""

Expand Down
1 change: 1 addition & 0 deletions aioradio/file_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# pylint: disable=too-many-lines
# pylint: disable=too-many-locals
# pylint: disable=too-many-nested-blocks
# pylint: disable=too-many-positional-arguments
# pylint: disable=too-many-public-methods

import asyncio
Expand Down
1 change: 1 addition & 0 deletions aioradio/psycopg2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# pylint: disable=c-extension-no-member
# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments

import psycopg2

Expand Down
1 change: 1 addition & 0 deletions aioradio/pyodbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# pylint: disable=c-extension-no-member
# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments
# pylint: disable=unsubscriptable-object

import os
Expand Down
1 change: 1 addition & 0 deletions aioradio/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# pylint: disable=no-member
# pylint: disable=too-many-arguments
# pylint: disable=too-many-instance-attributes
# pylint: disable=too-many-positional-arguments
# pylint: disable=unsubscriptable-object

import hashlib
Expand Down
16 changes: 8 additions & 8 deletions aioradio/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,36 @@ botocore==1.34.131
cython==3.0.11
databricks-connect==14.3.1
ddtrace==2.6.5
fakeredis==2.24.1
fakeredis==2.25.1
faust-cchardet==2.1.19
flask==3.0.3
flask-cors==4.0.1
grpcio==1.62.2
grpcio-status==1.62.2
haversine==2.8.1
httpx==0.27.2
importlib-metadata==8.4.0
mandrill==1.0.60
mlflow==2.14.3
mlflow==2.16.2
moto==4.2.14
numpy==1.26.4
openpyxl==3.0.10
orjson==3.9.15
pandas==2.2.2
pandas==2.2.3
pkginfo==1.10.0
polars==1.7.1
polars==1.9.0
pre-commit==3.8.0
protobuf==4.25.4
psycopg2-binary==2.9.9
pyarrow==15.0.2
pylint==3.2.7
pylint==3.3.1
pyodbc==5.1.0 --no-binary=pyodbc
pysmb==1.2.9.1
pysmb==1.2.10
pyspark==3.4.3
pytest==8.1.2
pytest-asyncio==0.21.1
pytest-cov==5.0.0
python-json-logger==2.0.7
redis==5.0.8
redis==5.1.1
twine==5.1.1
typing_extensions==4.11.0
werkzeug==3.0.4
Expand Down
5 changes: 2 additions & 3 deletions aioradio/tests/file_ingestion_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,10 @@ async def func():


@pytest.mark.asyncio
async def test_async_db_wrapper(user):
async def test_async_db_wrapper():
"""Test async_db_wrapper with database connections."""

if user != 'tim.reichard':
pytest.skip('Skip test_async_db_wrapper since user is not Tim Reichard')
pytest.skip('Skip test_async_db_wrapper since we no longer have access to aws secrets locally.')

db_info=[{
'db': 'pyodbc',
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
long_description = fileobj.read()

setup(name='aioradio',
version='0.21.0',
version='0.21.1',
description='Generic asynchronous i/o python utilities for AWS services (SQS, S3, DynamoDB, Secrets Manager), Redis, MSSQL (pyodbc), JIRA and more',
long_description=long_description,
long_description_content_type="text/markdown",
Expand All @@ -31,7 +31,6 @@
'fakeredis>=2.20.0',
'grpcio==1.62.2',
'grpcio-status==1.62.2',
'haversine>=2.8.0',
'httpx>=0.23.0',
'mandrill>=1.0.60',
'mlflow>=2.10.2',
Expand Down
Loading