Skip to content

Commit

Permalink
[FSTORE-1207] AirQuality LLM project (#250)
Browse files Browse the repository at this point in the history
* Function Calling & AirQuality FunctionCalling Chatbot
  • Loading branch information
Maxxx-zh authored Mar 18, 2024
1 parent 3b4eb4e commit c0d6818
Show file tree
Hide file tree
Showing 18 changed files with 4,125 additions and 1,123 deletions.
976 changes: 732 additions & 244 deletions advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb

Large diffs are not rendered by default.

942 changes: 848 additions & 94 deletions advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb

Large diffs are not rendered by default.

632 changes: 474 additions & 158 deletions advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb

Large diffs are not rendered by default.

402 changes: 335 additions & 67 deletions advanced_tutorials/air_quality/4_air_quality_batch_inference.ipynb

Large diffs are not rendered by default.

776 changes: 776 additions & 0 deletions advanced_tutorials/air_quality/5_function_calling.ipynb

Large diffs are not rendered by default.

104 changes: 104 additions & 0 deletions advanced_tutorials/air_quality/app_gradio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import gradio as gr
from transformers import pipeline
import numpy as np
import hopsworks
import joblib
from functions.llm_chain import load_model, get_llm_chain, generate_response

# Initialize the ASR pipeline
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

def connect_to_hopsworks():
# Initialize Hopsworks feature store connection
project = hopsworks.login()
fs = project.get_feature_store()

# Retrieve the model registry
mr = project.get_model_registry()

# Retrieve the 'air_quality_fv' feature view
feature_view = fs.get_feature_view(
name="air_quality_fv",
version=1,
)

# Initialize batch scoring
feature_view.init_batch_scoring(1)

# Retrieve the 'air_quality_xgboost_model' from the model registry
retrieved_model = mr.get_model(
name="air_quality_xgboost_model",
version=1,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

# Load the XGBoost regressor model and label encoder from the saved model directory
model_air_quality = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
encoder = joblib.load(saved_model_dir + "/label_encoder.pkl")

return feature_view, model_air_quality, encoder


def retrieve_llm_chain():

# Load the LLM and its corresponding tokenizer.
model_llm, tokenizer = load_model()

# Create and configure a language model chain.
llm_chain = get_llm_chain(
model_llm,
tokenizer,
)

return model_llm, tokenizer, llm_chain


# Retrieve the feature view, air quality model and encoder for the city_name column
feature_view, model_air_quality, encoder = connect_to_hopsworks()

# Load the LLM and its corresponding tokenizer and configure a language model chain
model_llm, tokenizer, llm_chain = retrieve_llm_chain()

def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
if y.ndim > 1 and y.shape[1] > 1:
y = np.mean(y, axis=1)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]

def generate_query_response(user_query):
response = generate_response(
user_query,
feature_view,
model_llm,
tokenizer,
model_air_quality,
encoder,
llm_chain,
verbose=False,
)
return response

def handle_input(text_input=None, audio_input=None):
if audio_input is not None:
user_query = transcribe(audio_input)
else:
user_query = text_input

if user_query:
return generate_query_response(user_query)
else:
return "Please provide input either via text or voice."

iface = gr.Interface(
fn=handle_input,
inputs=[gr.Textbox(placeholder="Type here or use voice input..."), gr.Audio()],
outputs="text",
title="🌤️ AirQuality AI Assistant 💬",
description="Ask your questions about air quality or use your voice to interact."
)

iface.launch(share=True)
99 changes: 99 additions & 0 deletions advanced_tutorials/air_quality/app_streamlit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import streamlit as st
import hopsworks
import joblib
from functions.llm_chain import load_model, get_llm_chain, generate_response
import warnings
warnings.filterwarnings('ignore')

st.title("🌤️ AirQuality AI assistant 💬")

@st.cache_resource()
def connect_to_hopsworks():
# Initialize Hopsworks feature store connection
project = hopsworks.login()
fs = project.get_feature_store()

# Retrieve the model registry
mr = project.get_model_registry()

# Retrieve the 'air_quality_fv' feature view
feature_view = fs.get_feature_view(
name="air_quality_fv",
version=1,
)

# Initialize batch scoring
feature_view.init_batch_scoring(1)

# Retrieve the 'air_quality_xgboost_model' from the model registry
retrieved_model = mr.get_model(
name="air_quality_xgboost_model",
version=1,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

# Load the XGBoost regressor model and label encoder from the saved model directory
model_air_quality = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
encoder = joblib.load(saved_model_dir + "/label_encoder.pkl")

return feature_view, model_air_quality, encoder


@st.cache_resource()
def retrieve_llm_chain():

# Load the LLM and its corresponding tokenizer.
model_llm, tokenizer = load_model()

# Create and configure a language model chain.
llm_chain = get_llm_chain(
model_llm,
tokenizer,
)

return model_llm, tokenizer, llm_chain


# Retrieve the feature view, air quality model and encoder for the city_name column
feature_view, model_air_quality, encoder = connect_to_hopsworks()

# Load the LLM and its corresponding tokenizer and configure a language model chain
model_llm, tokenizer, llm_chain = retrieve_llm_chain()

# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])

# React to user input
if user_query := st.chat_input("How can I help you?"):
# Display user message in chat message container
st.chat_message("user").markdown(user_query)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": user_query})

st.write('⚙️ Generating Response...')

# Generate a response to the user query
response = generate_response(
user_query,
feature_view,
model_llm,
tokenizer,
model_air_quality,
encoder,
llm_chain,
verbose=False,
)

# Display assistant response in chat message container
with st.chat_message("assistant"):
st.markdown(response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
158 changes: 0 additions & 158 deletions advanced_tutorials/air_quality/feature_pipeline.py

This file was deleted.

Empty file.
6 changes: 3 additions & 3 deletions advanced_tutorials/air_quality/features/air_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def shift_pm_2_5(df: pd.DataFrame, days: int = 5) -> pd.DataFrame:
"""
for shift_value in range(1, days + 1):
df[f'pm_2_5_previous_{shift_value}_day'] = df.groupby('city_name')['pm2_5'].shift(shift_value)
df = df.dropna()
return df


Expand Down Expand Up @@ -227,8 +226,9 @@ def feature_engineer_aq(df: pd.DataFrame) -> pd.DataFrame:
for i in [7, 14, 28]:
for func in [moving_std, exponential_moving_average, exponential_moving_std]:
df_res = func(df_res, i)

df_res = df_res.sort_values(by=["date", "pm2_5"]).dropna()


df_res = df_res.sort_values(by=["date", "pm2_5"])
df_res = df_res.reset_index(drop=True)

df_res['year'] = year(df_res['date'])
Expand Down
Loading

0 comments on commit c0d6818

Please sign in to comment.