Skip to content

Commit

Permalink
debug mmultimodel cache
Browse files Browse the repository at this point in the history
  • Loading branch information
peng3307165 committed May 15, 2024
1 parent a08a739 commit e6cfcb8
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 109 deletions.
49 changes: 0 additions & 49 deletions modelcache/embedding/clip_demo.py

This file was deleted.

19 changes: 10 additions & 9 deletions modelcache_mm/adapter/adapter_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ def adapt_insert(*args, **kwargs):
raise ValueError("Both pre_embedding_image_url and pre_embedding_image_raw cannot be non-empty at the same time.")

if pre_embedding_image_url:
url_start_time = time.time()
response = requests.get(pre_embedding_image_url)
image_data = response.content
pre_embedding_image = base64.b64encode(image_data).decode('utf-8')
get_image_time = '{}s'.format(round(time.time() - url_start_time, 2))
print('get_image_time: {}'.format(get_image_time))
# url_start_time = time.time()
# response = requests.get(pre_embedding_image_url)
# image_data = response.content
# pre_embedding_image = base64.b64encode(image_data).decode('utf-8')
# get_image_time = '{}s'.format(round(time.time() - url_start_time, 2))
# print('get_image_time: {}'.format(get_image_time))
pre_embedding_image = pre_embedding_image_url
elif pre_embedding_image_raw:
pre_embedding_image = pre_embedding_image_raw
else:
Expand All @@ -70,9 +71,9 @@ def adapt_insert(*args, **kwargs):
print('text_embeddings: {}'.format(text_embeddings))

if len(image_embeddings) > 0 and len(image_embeddings) > 0:
image_embedding = np.array(image_embeddings[0])
text_embedding = text_embeddings[0]
embedding_data = np.concatenate((image_embedding, text_embedding))
# image_embedding = np.array(image_embeddings[0])
# text_embedding = text_embeddings[0]
embedding_data = np.concatenate((image_embeddings, text_embeddings))
mm_type = 'mm'
elif len(image_embeddings) > 0:
image_embedding = np.array(image_embeddings[0])
Expand Down
28 changes: 16 additions & 12 deletions modelcache_mm/adapter/adapter_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,20 @@ def adapt_query(cache_data_convert, *args, **kwargs):
pre_embedding_image_raw = pre_embedding_data_dict['imageRaw']
pre_embedding_image_url = pre_embedding_data_dict['imageUrl']
pre_multi_type = pre_embedding_data_dict['multiType']
# print('pre_embedding_image_url: {}'.format(pre_embedding_image_url))
# print('pre_embedding_text: {}'.format(pre_embedding_text))

# 判断逻辑
if pre_multi_type == 'IMG_TEXT':
if pre_embedding_image_raw and pre_embedding_image_url:
raise ValueError(
"Both pre_embedding_imageUrl and pre_embedding_imageRaw cannot be non-empty at the same time.")
if pre_embedding_image_url:
url_start_time = time.time()
response = requests.get(pre_embedding_image_url)
image_data = response.content
pre_embedding_image = base64.b64encode(image_data).decode('utf-8')
get_image_time = '{}s'.format(round(time.time() - url_start_time, 2))
print('get_image_time: {}'.format(get_image_time))
# url_start_time = time.time()
# response = requests.get(pre_embedding_image_url)
# image_data = response.content
# pre_embedding_image = base64.b64encode(image_data).decode('utf-8')
# get_image_time = '{}s'.format(round(time.time() - url_start_time, 2))
# print('get_image_time: {}'.format(get_image_time))
pre_embedding_image = pre_embedding_image_url
elif pre_embedding_image_raw:
pre_embedding_image = pre_embedding_image_raw
else:
Expand All @@ -63,7 +62,7 @@ def adapt_query(cache_data_convert, *args, **kwargs):
if cache_enable:
if pre_multi_type == 'IMG_TEXT':
embedding_data_resp = time_cal(
chat_cache.embedding_concurrent_func,
chat_cache.embedding_func,
func_name="iat_embedding",
report_func=chat_cache.report.embedding,
)(data_dict)
Expand All @@ -76,10 +75,15 @@ def adapt_query(cache_data_convert, *args, **kwargs):
image_embeddings = embedding_data_resp['image_embedding']
text_embeddings = embedding_data_resp['text_embeddings']

print('image_embeddings: {}'.format(image_embeddings))
print('image_embeddings_len: {}'.format(len(image_embeddings)))
print('text_embeddings: {}'.format(text_embeddings))
print('text_embeddings_len: {}'.format(len(text_embeddings)))

if len(image_embeddings) > 0 and len(image_embeddings) > 0:
image_embedding = np.array(image_embeddings[0])
text_embedding = np.array(text_embeddings[0])
embedding_data = np.concatenate((image_embedding, text_embedding))
# image_embedding = np.array(image_embeddings[0])
# text_embedding = np.array(text_embeddings[0])
embedding_data = np.concatenate((image_embeddings, text_embeddings))
mm_type = 'mm'
elif len(image_embeddings) > 0:
image_embedding = np.array(image_embeddings[0])
Expand Down
3 changes: 0 additions & 3 deletions modelcache_mm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def __init__(self):
self.query_pre_embedding_func = None
self.insert_pre_embedding_func = None
self.embedding_func = None
self.embedding_concurrent_func = None
self.data_manager: Optional[DataManager] = None
self.similarity_evaluation: Optional[SimilarityEvaluation] = None
self.post_process_messages_func = None
Expand All @@ -34,7 +33,6 @@ def init(
query_pre_embedding_func=None,
insert_pre_embedding_func=None,
embedding_func=string_embedding,
embedding_concurrent_func=string_embedding,
data_manager: DataManager = get_data_manager(),
similarity_evaluation=ExactMatchEvaluation(),
post_process_messages_func=first,
Expand All @@ -46,7 +44,6 @@ def init(
self.query_pre_embedding_func = query_pre_embedding_func
self.insert_pre_embedding_func = insert_pre_embedding_func
self.embedding_func = embedding_func
self.embedding_concurrent_func = embedding_concurrent_func
self.data_manager: DataManager = data_manager
self.similarity_evaluation = similarity_evaluation
self.post_process_messages_func = post_process_messages_func
Expand Down
31 changes: 3 additions & 28 deletions modelcache_mm/embedding/__init__.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,7 @@
# -*- coding: utf-8 -*-
from modelcache.utils.lazy_import import LazyImport
huggingface = LazyImport("huggingface", globals(), "modelcache.embedding.huggingface")
data2vec = LazyImport("data2vec", globals(), "modelcache.embedding.data2vec")
llmEmb = LazyImport("llmEmb", globals(), "modelcache.embedding.llmEmb")
fasttext = LazyImport("fasttext", globals(), "modelcache.embedding.fasttext")
paddlenlp = LazyImport("paddlenlp", globals(), "modelcache.embedding.paddlenlp")
timm = LazyImport("timm", globals(), "modelcache.embedding.timm")
clip = LazyImport("clip", globals(), "modelcache_mm.embedding.clip")


def Huggingface(model="sentence-transformers/all-mpnet-base-v2"):
return huggingface.Huggingface(model)


def Data2VecAudio(model="facebook/data2vec-audio-base-960h"):
return data2vec.Data2VecAudio(model)


def LlmEmb2vecAudio():
return llmEmb.LlmEmb2Vec()


def FastText(model="en", dim=None):
return fasttext.FastText(model, dim)


def PaddleNLP(model="ernie-3.0-medium-zh"):
return paddlenlp.PaddleNLP(model)


def Timm(model="resnet50", device="default"):
return timm.Timm(model, device)
def Clip2Vec(model="damo/multi-modal_clip-vit-base-patch16_zh"):
return clip.ClipAudio(model)
12 changes: 4 additions & 8 deletions modelcache/embedding/clip.py → modelcache_mm/embedding/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,11 @@


class ClipAudio(BaseEmbedding):
def __init__(self, model: str = "sentence-transformers/all-MiniLM-L6-v2"):
# current_dir = os.path.dirname(os.path.abspath(__file__))
# parent_dir = os.path.dirname(current_dir)
# model_dir = os.path.dirname(parent_dir)
# model = os.path.join(model_dir, 'model/text2vec-base-chinese/')

def __init__(self, model: str = 'damo/multi-modal_clip-vit-base-patch16_zh'):
self.model = model
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.clip_pipeline = pipeline(task=Tasks.multi_modal_embedding,
model='damo/multi-modal_clip-vit-base-patch16_zh', model_revision='v1.0.1')

model=model, model_revision='v1.0.1')
self.__dimension = 1024

def to_embeddings(self, data_dict, **_):
Expand Down

0 comments on commit e6cfcb8

Please sign in to comment.