Skip to content

Commit

Permalink
Merge pull request #11 from GSK-Biostatistics/feature/hive-loader
Browse files Browse the repository at this point in the history
Feature/hive loader
  • Loading branch information
paltusplintus authored Dec 8, 2022
2 parents a07f607 + 6fa2faf commit d71ea89
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 1 deletion.
1 change: 1 addition & 0 deletions data_loaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from data_loaders.file_data_loader import FileDataLoader
from data_loaders.sql_server_data_loader import SQLServerDataLoader
from data_loaders.azure_data_loader import AzureDataLoader
from data_loaders.hive_data_loader import HiveDataLoader
43 changes: 43 additions & 0 deletions data_loaders/hive_data_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import neointerface
import os
import jaydebeapi
import pandas as pd


class HiveDataLoader:
def __init__(self,
db_host=os.environ.get("HIVE_SERVER"),
schema=os.environ.get("HIVE_SCHEMA"),
user=os.environ.get("HIVE_USER_ID"),
password=os.environ.get("HIVE_PASSWORD"),
driver_path=os.environ.get("HIVE_JDBC_DRIVER_PATH"),
driver_class=os.environ.get("HIVE_JDBC_DRIVER"),
*args, **kwargs):
self._connection = None
self._schema = schema
self._driver_class = driver_class
self._db_host = db_host
self._user = user
self._password = password
self._driver_path = driver_path

def open(self):
self._connection = jaydebeapi.connect(self._driver_class, f'jdbc:hive2://{self._db_host}/{self._schema}',
{'UID': self._user, 'PWD': self._password, 'AuthMech': "3"}, self._driver_path)

def query(self, statement: str):
cursor = self._connection.cursor()
cursor.execute(statement)
results = cursor.fetchall()
names = [item[0] for item in cursor.description]
df = pd.DataFrame(results)
df.columns = names
cursor.close()
return df

def get_tables(self):
tables_df = self.query(f'show tables from {self._schema}')
return tables_df

def close(self):
self._connection.close()
1 change: 1 addition & 0 deletions requirements-local.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ azure-identity==1.6.0
datacompy==0.7.2
colorlog==6.6.0
PyGithub==1.55
jaydebeapi==1.2.3
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ azure-identity==1.6.0
datacompy==0.7.2
colorlog==6.6.0
PyGithub==1.55
jaydebeapi==1.2.3
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def read_text(file_name: str):

setuptools.setup(
name="tab2neo", # This is the name of the package
version="1.0.1.0",
version="1.1.0.0", # Release.Major Feature.Minor Feature.Bug Fix
author="Alexey Kuznetsov", # Full name of the author
description="Clinical Linked Data: High-level Python classes to load, model and reshape tabular data imported into Neo4j database",
long_description=long_description, # Long description read from the the readme file
Expand Down

0 comments on commit d71ea89

Please sign in to comment.