Skip to content

Commit

Permalink
Add functionalities to pipeline
Browse files Browse the repository at this point in the history
Add auto-correction for execution errors.
Include classification for complex and simple tasks.
Provide an exit option in the API selection step.
Add GPT as a side option to support inconvenient code generation, supporting generating codes from GPT in each single turn
  • Loading branch information
DoraDong-2023 committed May 22, 2024
1 parent ee9f94f commit aeea5f0
Show file tree
Hide file tree
Showing 7 changed files with 286 additions and 64 deletions.
2 changes: 2 additions & 0 deletions chatbot_ui_biomania/components/Chat/LibCardSelect.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export const libImages: { [key: string]: string } = {
'snapatac2': '/apps/snapatac2.webp',
'anndata': '/apps/anndata.webp',
//'custom': '/apps/customize.webp',
'GPT': '/apps/GPT.webp',
};

export const LibCardSelect = () => {
Expand Down Expand Up @@ -57,6 +58,7 @@ export const LibCardSelect = () => {
{ id: 'snapatac2',name: 'snapatac2' },
{ id: 'anndata', name: 'anndata' },
//{ id: 'custom', name: 'custom' },
{ id: 'GPT', name: 'GPT' },
];

const existingLibIds = methods.map(lib => lib.id);
Expand Down
Binary file modified chatbot_ui_biomania/public/.DS_Store
Binary file not shown.
Binary file added chatbot_ui_biomania/public/apps/GPT.webp
Binary file not shown.
210 changes: 146 additions & 64 deletions src/deploy/model.py

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions src/inference/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@
from ..gpt.utils import get_all_api_json, find_similar_api_pairs, is_pair_in_merged_pairs, find_similar_two_pairs, get_ambiguous_pairs, save_json, load_json
import numpy as np
from PIL import Image
from collections import Counter

def find_differences(annotate_data, api_data):
diff_data = [item for item in annotate_data if item['query_id'] >= len(api_data)]
api_names = [item['api_calling'][0].split('(')[0] for item in diff_data]
api_counts = Counter(api_names)
print(f"Total differences found: {len(diff_data)}")
print(f"Unique API functions: {len(api_counts)}")
assert max(api_counts.values()) - min(api_counts.values()) <= 0, "API function distribution is not even"
return diff_data

def json_to_docstring(api_name, description, parameters):
params_list = ', '.join([
Expand Down
1 change: 1 addition & 0 deletions src/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
#from . import train_classification
from . import train_retriever
#from . import lit_llama
from . import dialog_classifier
127 changes: 127 additions & 0 deletions src/models/dialog_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""
Author: Zhengyuan Dong
Date Created: May 06, 2024
Last Modified: May 21, 2024
Description: compare the tutorial summary query and single query retrieval results
"""

import numpy as np
from scipy.stats import norm
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

import os
from sentence_transformers import util
from tqdm import tqdm

from src.gpt.utils import load_json, save_json

class Dialog_Gaussian_classificaiton:
def __init__(self, threshold=0.05):
self.threshold = threshold

def fit_gaussian(self, data):
self.mean = np.mean(data)
self.std = np.std(data)
return self.mean, self.std

def calculate_p_values(self, scores, mean, std):
return [norm.cdf(score, mean, std) for score in scores]

def classify_based_on_p(self, p_values, threshold=0.05):
return [1 if p < threshold else 0 for p in p_values]

def classify(self, rank_1_scores):
p_values_val = self.calculate_p_values(rank_1_scores, self.mean, self.std)
predictions_val = self.classify_based_on_p(p_values_val, threshold=self.threshold)
return predictions_val

def compute_acc(self, labels, predictions):
return accuracy_score(labels, predictions)

def plot_boxplot(self, data, title, LIB):
plt.figure(figsize=(10, 6))
sns.boxplot(data=data)
plt.title(title)
plt.xticks(ticks=range(5), labels=[f'Rank {i+1}' for i in range(5)])
plt.ylabel('Score')
plt.savefig(f'./plot/{LIB}/avg_retriever_{title}.pdf')

def compute_accuracy_filter_compositeAPI(self, LIB, retriever, data, retrieved_api_nums, name='train', LIB_ALIAS='scanpy', verbose=False, filter_composite=True):
# remove class type API, and composite API from the data
API_composite = load_json(os.path.join(f"data/standard_process/{LIB}","API_composite.json"))
data_to_save = []
scores_rank_1 = []
scores_rank_2 = []
scores_rank_3 = []
scores_rank_4 = []
scores_rank_5 = []
outliers = []
total_api_non_composite = 0
total_api_non_ambiguous = 0
query_to_api = {}
query_to_retrieved_api = {}
query_to_all_scores = {}
for query_data in tqdm(data):
retrieved_apis = retriever.retrieving(query_data['query'], top_k=retrieved_api_nums+20)
if filter_composite:
retrieved_apis = [i for i in retrieved_apis if i.startswith(LIB_ALIAS) and API_composite[i]['api_type']!='class' and API_composite[i]['api_type']!='unknown']
retrieved_apis = retrieved_apis[:retrieved_api_nums]
assert len(retrieved_apis)==retrieved_api_nums
query_to_retrieved_api[query_data['query']] = retrieved_apis
try:
query_to_api[query_data['query']] = query_data['api_calling'][0].split('(')[0]
except:
pass
query_embedding = retriever.embedder.encode(query_data['query'], convert_to_tensor=True)
hits = util.semantic_search(query_embedding, retriever.corpus_embeddings, top_k=5, score_function=util.cos_sim)
if len(hits[0]) > 0:
scores_rank_1.append(hits[0][0]['score'])
if len(hits[0]) > 1:
scores_rank_2.append(hits[0][1]['score'])
if len(hits[0]) > 2:
scores_rank_3.append(hits[0][2]['score'])
if len(hits[0]) > 3:
scores_rank_4.append(hits[0][3]['score'])
if len(hits[0]) > 4:
scores_rank_5.append(hits[0][4]['score'])
scores = [hit['score'] for hit in hits[0]] if hits[0] else []
query_to_all_scores[query_data['query']] = scores
# Compute average scores for each rank
scores = {
"rank_1": scores_rank_1,
"rank_2": scores_rank_2,
"rank_3": scores_rank_3,
"rank_4": scores_rank_4,
"rank_5": scores_rank_5
}
q1, q3 = np.percentile(scores_rank_1, [25, 75])
iqr = q3 - q1
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
for i, score in enumerate(scores_rank_1):
if score < lower_bound or score > upper_bound:
try:
outliers.append({'index': i, 'score': score, 'query': data[i]['query'], 'retrieved_apis': query_to_retrieved_api[data[i]['query']], 'query_api': query_to_api[data[i]['query']], 'all_scores': query_to_all_scores[data[i]['query']]})
if verbose:
print(f"{name} Outlier detected: Score = {score}, Query = {data[i]['query']}, retrieved_apis = {query_to_retrieved_api[data[i]['query']]}, query_api = {query_to_api[data[i]['query']]}, score = {query_to_all_scores[data[i]['query']]}")
except:
pass
return scores, outliers
def single_prediction(self, query, retriever, top_k):
query_embedding = retriever.embedder.encode(query, convert_to_tensor=True)
hits = util.semantic_search(query_embedding, retriever.corpus_embeddings, top_k=top_k, score_function=util.cos_sim)
if len(hits[0]) > 0:
score_rank1 = hits[0][0]['score']
# TODO: need to load the threshold for the score_rank1 to distinguish whether it is a dialog
pred_label = self.classify([score_rank1])
if pred_label==1:
pred_class = 'multiple'
else:
pred_class = 'single'
return pred_class

import inspect
__all__ = list(set([name for name, obj in locals().items() if not name.startswith('_') and (inspect.isfunction(obj) or (inspect.isclass(obj) and name != '__init__') or (inspect.ismethod(obj) and not name.startswith('_')))]))

0 comments on commit aeea5f0

Please sign in to comment.